Java Code Examples for org.apache.hadoop.io.Text#toString()
The following examples show how to use
org.apache.hadoop.io.Text#toString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ST_Polygon.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
public BytesWritable evaluate(Text wkwrap) throws UDFArgumentException { String wkt = wkwrap.toString(); try { OGCGeometry ogcObj = OGCGeometry.fromText(wkt); ogcObj.setSpatialReference(null); if (ogcObj.geometryType().equals("Polygon")) { return GeometryUtils.geometryToEsriShapeBytesWritable(ogcObj); } else { LogUtils.Log_InvalidType(LOG, GeometryUtils.OGCType.ST_POLYGON, GeometryUtils.OGCType.UNKNOWN); return null; } } catch (Exception e) { // IllegalArgumentException, GeometryException LogUtils.Log_InvalidText(LOG, wkt); return null; } }
Example 2
Source File: PeopleRank2.java From MapReduce-Demo with MIT License | 6 votes |
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String pid = key.toString(); //key:"a" value:"d c b" int runCount = context.getConfiguration().getInt("runCount", 1); //从上下文配置中获取runCount的值,如果值为空则默认为1 People people = null; if (runCount == 1) { //第一次运行时,Map输入记录中没有PR值,将PR值默认设为1.0 people = People.fromMR("1.0" + People.fieldSeparator + value.toString());//参数String格式:"PeopleRank值 u1 u2..." } else { //后续迭代的Map输入value中已经包含PR值,无需再指定 people = People.fromMR(value.toString()); } context.write(new Text(pid), new Text(people.toString())); //Map输出格式:"userid pr值 userlist" if (people.containsAttentionPeoples()) { //如果Map输入中有被关注人,则计算每个被关注人的概率并通过Map输出 double outValue = people.getPeopleRank() / people.getAttentionPeoples().length; //例如:a关注bcd,则bcd得到的概率都是1.0/3 for (int i = 0; i < people.getAttentionPeoples().length; i++) { context.write(new Text(people.getAttentionPeoples()[i]), new Text(outValue + "")); //Map输出格式:"被关注人id 关注人投给被关注人的概率" } } }
Example 3
Source File: SnapshotIndexDeletionPolicy.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private void loadGenerations() throws IOException { FileSystem fileSystem = _path.getFileSystem(_configuration); FileStatus[] listStatus = fileSystem.listStatus(_path); SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus)); if (existing.isEmpty()) { return; } FileStatus last = existing.last(); Reader reader = new SequenceFile.Reader(fileSystem, last.getPath(), _configuration); Text key = new Text(); LongWritable value = new LongWritable(); while (reader.next(key, value)) { String name = key.toString(); long gen = value.get(); _namesToGenerations.put(name, gen); Set<String> names = _generationsToNames.get(gen); if (names == null) { names = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>()); _generationsToNames.put(gen, names); } names.add(name); } reader.close(); existing.remove(last); cleanupOldFiles(fileSystem, existing); }
Example 4
Source File: FrameReaderJSONL.java From systemds with Apache License 2.0 | 6 votes |
protected static int readJSONLFrameFromInputSplit(InputSplit split, InputFormat<LongWritable, Text> inputFormat, JobConf jobConf, Types.ValueType[] schema, Map<String, Integer> schemaMap, FrameBlock dest, int currentRow) throws IOException, JSONException { RecordReader<LongWritable, Text> reader = inputFormat.getRecordReader(split, jobConf, Reporter.NULL); LongWritable key = new LongWritable(); Text value = new Text(); int row = currentRow; try { while (reader.next(key, value)) { // Potential Problem if JSON/L Object is very large JSONObject jsonObject = new JSONObject(value.toString()); for (Map.Entry<String, Integer> entry : schemaMap.entrySet()) { String strCellValue = getStringFromJSONPath(jsonObject, entry.getKey()); dest.set(row, entry.getValue(), UtilFunctions.stringToObject(schema[entry.getValue()], strCellValue)); } row++; } } finally { IOUtilFunctions.closeSilently(reader); } return row; }
Example 5
Source File: NewOldCustomElement.java From WIFIProbe with Apache License 2.0 | 6 votes |
public void readFields(DataInput dataInput) throws IOException { Text text = new Text(); text.readFields(dataInput); wifiProb = text.toString(); LongWritable reader = new LongWritable(); reader.readFields(dataInput); hour = reader.get(); reader.readFields(dataInput); newCustomer = (int)reader.get(); reader.readFields(dataInput); oldCustomer = (int)reader.get(); }
Example 6
Source File: MRCaching.java From hadoop with Apache License 2.0 | 5 votes |
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } }
Example 7
Source File: P1Q3.java From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0 | 5 votes |
public final void map(final LongWritable key, final Text value, final Context context) throws IOException, InterruptedException { final String line = value.toString(); final String[] data = line.trim().split("\t"); if (data.length == 6) { final String product = "JustADummyKey"; final Double sales = Double.parseDouble(data[4]); word.set(product); context.write(word, new DoubleWritable(sales)); } }
Example 8
Source File: TermVectorPerHost.java From big-c with Apache License 2.0 | 5 votes |
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Map<String, IntWritable> vectorTerms= new HashMap<String, IntWritable>(); String freqString = new String(""); String termVector = new String(""); String word = new String(""); int freq = 0; for(Text value: values){ termVector = value.toString(); int index = termVector.lastIndexOf(":"); word = termVector.substring(0, index); freqString = termVector.substring(index+1); freq = Integer.parseInt(freqString); if (vectorTerms.containsKey(word)){ freq += vectorTerms.get(word).get(); } vectorTerms.put(word, new IntWritable(freq)); } Map<String, IntWritable> vectorTermsSorted = sortByValue(vectorTerms); Set<Map.Entry<String, IntWritable>> set = vectorTermsSorted.entrySet(); Iterator<Map.Entry<String, IntWritable>> i = set.iterator(); while(i.hasNext()){ Map.Entry<String, IntWritable> me = (Map.Entry<String, IntWritable>)i.next(); if(me.getValue().get() >= CUTOFF){ String termVectorString = new String(me.getKey() + ":" + me.getValue()); context.write(key, new Text (termVectorString)); } } }
Example 9
Source File: DecomposeFriendsMapper.java From MapReduce-Demo with MIT License | 5 votes |
public void map(Object key,Text value,Context context) throws IOException, InterruptedException { String strs = value.toString(); Text uString = new Text(strs.substring(0, 1)); String[] friends = strs.substring(2).split(","); //A:B,C,D,F,E,O for (int i = 0; i < friends.length; i++) { // 以<B,A>,<C,A>形式输出 context.write(new Text(friends[i]),uString); } }
Example 10
Source File: TokenCountMapper.java From RDFS with Apache License 2.0 | 5 votes |
public void map(K key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { // get input text String text = value.toString(); // value is line of text // tokenize the value StringTokenizer st = new StringTokenizer(text); while (st.hasMoreTokens()) { // output <token,1> pairs output.collect(new Text(st.nextToken()), new LongWritable(1)); } }
Example 11
Source File: JHLogAnalyzer.java From hadoop with Apache License 2.0 | 5 votes |
public int getPartition(Text key, Text value, int numPartitions) { IntervalKey intKey = new IntervalKey(key.toString()); if(intKey.statName.equals(StatSeries.STAT_ALL_SLOT_TIME.toString())) { if(intKey.taskType.equals("MAP")) return 0; else if(intKey.taskType.equals("REDUCE")) return 1; } else if(intKey.statName.equals( StatSeries.STAT_SUBMIT_PENDING_SLOT_TIME.toString())) { if(intKey.taskType.equals("MAP")) return 2; else if(intKey.taskType.equals("REDUCE")) return 3; } else if(intKey.statName.equals( StatSeries.STAT_LAUNCHED_PENDING_SLOT_TIME.toString())) { if(intKey.taskType.equals("MAP")) return 4; else if(intKey.taskType.equals("REDUCE")) return 5; } else if(intKey.statName.equals( StatSeries.STAT_FAILED_SLOT_TIME.toString())) { if(intKey.taskType.equals("MAP")) return 6; else if(intKey.taskType.equals("REDUCE")) return 7; } return 8; }
Example 12
Source File: TestOrderedWordCount.java From tez with Apache License 2.0 | 5 votes |
public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } }
Example 13
Source File: MoreIndexingFilter.java From anthelion with Apache License 2.0 | 5 votes |
public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException { String url_s = url.toString(); addTime(doc, parse.getData(), url_s, datum); addLength(doc, parse.getData(), url_s); addType(doc, parse.getData(), url_s, datum); resetTitle(doc, parse.getData(), url_s); return doc; }
Example 14
Source File: RegexMapper.java From RDFS with Apache License 2.0 | 5 votes |
public void map(K key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { String text = value.toString(); Matcher matcher = pattern.matcher(text); while (matcher.find()) { output.collect(new Text(matcher.group(group)), new LongWritable(1)); } }
Example 15
Source File: MapCommunity.java From hadoop-louvain-community with Apache License 2.0 | 4 votes |
@Override protected void map(Text key, BytesWritable value, Context context) throws IOException, InterruptedException { /** * FileFormat * * Metis format * ****** * Remote */ //example: 4elt_0.part String fileName = key.toString(); String _parts[] = fileName.split("_"); String dotParts[] = _parts[1].split("\\."); InputStream inputStream = new ByteArrayInputStream(value.getBytes()); int rank = Integer.parseInt(dotParts[0]); if(verbose) { System.out.println("Begin"); } try { Community c = new Community(inputStream,-1,nb_pass,precision); Graph g =null; boolean improvement = true; double mod = c.modularity(), new_mod; int level = 0; if (verbose) { System.out.print("" + rank + ":" + "level " + level ); System.out.print(" start computation"); System.out.println( " network size: " + c.getG().getNb_nodes() + " nodes, " + c.getG().getNb_links() + " links, " + c.getG().getTotal_weight() + " weight." ); } improvement = c.one_level(); new_mod = c.modularity(); if (++level == display_level) g.display(); if (display_level == -1){ String filepath = outpath + File.separator + "out_" + level + "_" + rank + ".txt"; c.display_partition(filepath); } g = c.partition2graph_binary(); if(verbose) { System.out.println( " network size: " + c.getG().getNb_nodes() + " nodes, " + c.getG().getNb_links() + " links, " + c.getG().getTotal_weight() + " weight." ); } GraphMessage msg = createGraphMessage(g,c,rank); //Send to reducer ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oo = new ObjectOutputStream(bos); oo.writeObject(msg); context.write(new Text("one"),new BytesWritable(bos.toByteArray())); } catch (Exception e) { e.printStackTrace(); throw new InterruptedException(e.toString()); } }
Example 16
Source File: OperationOutput.java From hadoop with Apache License 2.0 | 4 votes |
OperationOutput(Text key, Object value) { this(key.toString(), value); }
Example 17
Source File: DGAEdgeTTTOutputFormat.java From distributed-graph-analytics with Apache License 2.0 | 4 votes |
@Override public String getVertexValueAsString(Text vertexValue) { return vertexValue.toString(); }
Example 18
Source File: AegisthusLoader.java From aegisthus with Apache License 2.0 | 4 votes |
@Override public Tuple getNext() throws IOException { if (!mRequiredColumnsInitialized) { if (signature != null) { mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(getUdfProperty(REQUIRED_COLUMNS)); } mRequiredColumnsInitialized = true; } if (reader == null) { return null; } if (serializer == null) { serializer = new AegisthusSerializer(); } try { while (reader.nextKeyValue()) { Text value = (Text) reader.getCurrentValue(); String s = value.toString(); if (s.contains("\t")) { s = s.split("\t")[1]; } Map<String, Object> map = serializer.deserialize(s); if (clean) { cleanse(map); // when clean if we have an empty row we will ignore it. The // map will be size 2 because it will only // have the key and the deleted ts // TODO: only remove row if it is empty and is deleted. if (map.size() == 2) { continue; } } return tuple(map); } } catch (InterruptedException e) { // ignore } return null; }
Example 19
Source File: KMeansDistortionMapReduce.java From geowave with Apache License 2.0 | 4 votes |
@Override public void reduce( final Text key, final Iterable<CountofDoubleWritable> values, final Reducer<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry>.Context context) throws IOException, InterruptedException { double expectation = 0.0; final List<AnalyticItemWrapper<Object>> centroids = centroidManager.getCentroidsForGroup(key.toString()); // it is possible that the number of items in a group are smaller // than the cluster final Integer kCount; if (expectedK == null) { kCount = centroids.size(); } else { kCount = expectedK; } if (centroids.size() == 0) { return; } final double numDimesions = 2 + centroids.get(0).getExtraDimensions().length; double ptCount = 0; for (final CountofDoubleWritable value : values) { expectation += value.getValue(); ptCount += value.getCount(); } if (ptCount > 0) { expectation /= ptCount; final Double distortion = Math.pow(expectation / numDimesions, -(numDimesions / 2)); final DistortionEntry entry = new DistortionEntry(key.toString(), batchId, kCount, distortion); context.write( new GeoWaveOutputKey( DistortionDataAdapter.ADAPTER_TYPE_NAME, DistortionGroupManagement.DISTORTIONS_INDEX_ARRAY), entry); } }
Example 20
Source File: Logalyzer.java From hadoop-gpu with Apache License 2.0 | 4 votes |
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { if (sortSpec == null) { return super.compare(b1, s1, l1, b2, s2, l2); } try { Text logline1 = new Text(); logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1))); String line1 = logline1.toString(); String[] logColumns1 = line1.split(columnSeparator); Text logline2 = new Text(); logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2))); String line2 = logline2.toString(); String[] logColumns2 = line2.split(columnSeparator); if (logColumns1 == null || logColumns2 == null) { return super.compare(b1, s1, l1, b2, s2, l2); } //Compare column-wise according to *sortSpec* for(int i=0; i < sortSpec.length; ++i) { int column = (Integer.valueOf(sortSpec[i]).intValue()); String c1 = logColumns1[column]; String c2 = logColumns2[column]; //Compare columns int comparision = super.compareBytes( c1.getBytes(), 0, c1.length(), c2.getBytes(), 0, c2.length() ); //They differ! if (comparision != 0) { return comparision; } } } catch (IOException ioe) { LOG.fatal("Caught " + ioe); return 0; } return 0; }