Java Code Examples for org.apache.hadoop.io.Text#set()
The following examples show how to use
org.apache.hadoop.io.Text#set() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkFactDistinct.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void addFieldValue(DataType type, Integer colIndex, String value, List<Tuple2<SelfDefineSortableKey, Text>> result) { int reducerIndex = reducerMapping.getReducerIdForCol(colIndex, value); tmpbuf.clear(); byte[] valueBytes = Bytes.toBytes(value); int size = valueBytes.length + 1; if (size >= tmpbuf.capacity()) { tmpbuf = ByteBuffer.allocate(countNewSize(tmpbuf.capacity(), size)); } tmpbuf.put(Bytes.toBytes(reducerIndex)[3]); tmpbuf.put(valueBytes); Text outputKey = new Text(); SelfDefineSortableKey sortableKey = new SelfDefineSortableKey(); outputKey.set(tmpbuf.array(), 0, tmpbuf.position()); sortableKey.init(outputKey, type); result.add(new Tuple2<SelfDefineSortableKey, Text>(sortableKey, new Text())); // log a few rows for troubleshooting if (result.size() < 10) { logger.info("Sample output: {} '{}' => reducer {}", allCols.get(colIndex), value, reducerIndex); } }
Example 2
Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0 | 6 votes |
public static void extractCSVOutputIdentityStripFirstField(Text value, Text input) { String csvOut = input.toString(); String tokens[] = csvOut.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)", -1); if (tokens.length > 4) { setCSVOutput(value, tokens[1], tokens[2], tokens[3], tokens[4]); } else if (tokens.length == 4) { setCSVOutput(value, tokens[1], tokens[2], tokens[3]); } else { logger.info("WARN: tokens.length = " + tokens.length + " != 4 for input = " + csvOut); value.set(input.toString()); } }
Example 3
Source File: TestContainerLocalizer.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) static DataInputBuffer createFakeCredentials(Random r, int nTok) throws IOException { Credentials creds = new Credentials(); byte[] password = new byte[20]; Text kind = new Text(); Text service = new Text(); Text alias = new Text(); for (int i = 0; i < nTok; ++i) { byte[] identifier = ("idef" + i).getBytes(); r.nextBytes(password); kind.set("kind" + i); service.set("service" + i); alias.set("token" + i); Token token = new Token(identifier, password, kind, service); creds.addToken(alias, token); } DataOutputBuffer buf = new DataOutputBuffer(); creds.writeTokenStorageToStream(buf); DataInputBuffer ret = new DataInputBuffer(); ret.reset(buf.getData(), 0, buf.getLength()); return ret; }
Example 4
Source File: SelfDefineSortableKeyTest.java From kylin with Apache License 2.0 | 6 votes |
private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) { int partationId = 0; ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>(); for (String str : strNumList) { ByteBuffer keyBuffer = ByteBuffer.allocate(4096); int offset = keyBuffer.position(); keyBuffer.put(Bytes.toBytes(partationId)[3]); keyBuffer.put(Bytes.toBytes(str)); Bytes.copy(keyBuffer.array(), 1, keyBuffer.position() - offset - 1); Text outputKey = new Text(); outputKey.set(keyBuffer.array(), offset, keyBuffer.position() - offset); SelfDefineSortableKey sortableKey = new SelfDefineSortableKey(); sortableKey.init(outputKey, typeFlag); keyList.add(sortableKey); } return keyList; }
Example 5
Source File: OutputToTextMapper.java From wikireverse with MIT License | 5 votes |
public void map(Text lowerCaseKey, LinkArrayWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try { int recordCount = 0; Text outputValue = new Text(); String correctCase = value.getMostUsedArticleCasing(); String correctKey = lowerCaseKey.toString().replace(correctCase.toLowerCase(), correctCase); if (correctKey.endsWith("//")) { correctKey = correctKey.replaceAll("////", ""); } Text key = new Text(correctKey); for (Writable rawValue : value.get()) { LinkWritable link = (LinkWritable)rawValue; outputValue.set(link.toString()); output.collect(key, outputValue); recordCount++; } reporter.incrCounter(COUNTER_GROUP, RECORDS_FETCHED, 1); reporter.incrCounter(COUNTER_GROUP, RESULTS_OUTPUT, recordCount); } catch(Exception e) { reporter.incrCounter(COUNTER_GROUP, MAP_EXCEPTION, 1); LOG.error(StringUtils.stringifyException(e)); } }
Example 6
Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0 | 5 votes |
private int generateSentence(Text t, int noWords) { sentence.setLength(0); --noWords; for (int i = 0; i < noWords; ++i) { sentence.append(words[r.nextInt(words.length)]); sentence.append(" "); } if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]); t.set(sentence.toString()); return sentence.length(); }
Example 7
Source File: RandomWriter.java From big-c with Apache License 2.0 | 5 votes |
public boolean nextKeyValue() { if (name != null) { key = new Text(); key.set(name.getName()); name = null; return true; } return false; }
Example 8
Source File: TestMapRed.java From RDFS with Apache License 2.0 | 5 votes |
public void testNullKeys() throws Exception { JobConf conf = new JobConf(TestMapRed.class); FileSystem fs = FileSystem.getLocal(conf); Path testdir = new Path( System.getProperty("test.build.data","/tmp")).makeQualified(fs); fs.delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile, NullWritable.class, Text.class, SequenceFile.CompressionType.NONE); Text t = new Text(); t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t); t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t); t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t); t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t); t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t); t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t); t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t); t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t); w.close(); FileInputFormat.setInputPaths(conf, inFile); FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout")); conf.setMapperClass(NullMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setNumReduceTasks(1); JobClient.runJob(conf); SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"), conf); String m = "AAAAAAAAAAAAAA"; for (int i = 1; r.next(NullWritable.get(), t); ++i) { assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString())); m = m.replace((char)('A' + i - 1), (char)('A' + i)); } }
Example 9
Source File: EsInputFormat.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override protected Text setCurrentKey(Text hadoopKey, Object object) { if (hadoopKey != null) { hadoopKey.set(object.toString()); } return hadoopKey; }
Example 10
Source File: FactDistinctColumnsReducerTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testReducerStatistics() throws IOException { setConfigurations(); setMultipleOutputs(BatchConstants.CFG_OUTPUT_STATISTICS, reduceDriver.getConfiguration(), SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class, NullWritable.class, LongWritable.class); // override the task id int dimColsSize = cubeDesc.getRowkey().getRowKeyColumns().length; int uhcSize = cubeDesc.getAllUHCColumns().size(); final int targetTaskId = (dimColsSize - uhcSize) + uhcSize * cubeDesc.getConfig().getUHCReducerCount(); setContextTaskId(targetTaskId); ByteBuffer tmpBuf = ByteBuffer.allocate(4096); tmpBuf.put((byte) FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER); // one byte tmpBuf.putLong(100); Text outputKey1 = new Text(); outputKey1.set(tmpBuf.array(), 0, tmpBuf.position()); SelfDefineSortableKey key1 = new SelfDefineSortableKey(); key1.init(outputKey1, (byte) 0); HLLCounter hll = createMockHLLCounter(); ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE); hllBuf.clear(); hll.writeRegisters(hllBuf); Text value1 = new Text(); value1.set(hllBuf.array(), 0, hllBuf.position()); reduceDriver.setInput(key1, ImmutableList.of(value1)); List<Pair<NullWritable, Text>> result = reduceDriver.run(); assertEquals(0, result.size()); // the reducer output statistics info to a sequence file. }
Example 11
Source File: MockRecordReader.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Override public boolean next( Text key, Text value ) throws IOException { if ( !rowIter.hasNext() ) { return false; } rowNum++; key.set( String.valueOf( rowNum ) ); value.set( rowIter.next() ); return true; }
Example 12
Source File: SequenceFileAsTextRecordReader.java From big-c with Apache License 2.0 | 5 votes |
/** Read key/value pair in a line. */ public synchronized boolean next(Text key, Text value) throws IOException { Text tKey = key; Text tValue = value; if (!sequenceFileRecordReader.next(innerKey, innerValue)) { return false; } tKey.set(innerKey.toString()); tValue.set(innerValue.toString()); return true; }
Example 13
Source File: PatternMatcherTest.java From jumbune with GNU Lesser General Public License v3.0 | 5 votes |
@Test public void matchTestAgainstStringNull() { Text value = new Text(); value.set("null"); boolean check = PatternMatcher.match(value); assertFalse(check); }
Example 14
Source File: TestIndexedSort.java From hadoop with Apache License 2.0 | 5 votes |
private static void genRandom(Text t, int len, StringBuilder sb) { sb.setLength(0); for (int i = 0; i < len; ++i) { sb.append(Integer.toString(r.nextInt(26) + 10, 36)); } t.set(sb.toString()); }
Example 15
Source File: SequenceFileAsTextRecordReader.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** Read key/value pair in a line. */ public synchronized boolean next(Text key, Text value) throws IOException { Text tKey = key; Text tValue = value; if (!sequenceFileRecordReader.next(innerKey, innerValue)) { return false; } tKey.set(innerKey.toString()); tValue.set(innerValue.toString()); return true; }
Example 16
Source File: SequenceFileProtobufWriter.java From hiped2 with Apache License 2.0 | 5 votes |
/** * Write the sequence file. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); ProtobufSerialization.register(conf); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputPath), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Stock.class), SequenceFile.Writer.compression( SequenceFile.CompressionType.BLOCK, new DefaultCodec()) ); try { Text key = new Text(); for (Stock stock : StockUtils.fromCsvFile(inputFile)) { key.set(stock.getSymbol()); writer.append(key, stock); } } finally { writer.close(); } return 0; }
Example 17
Source File: KettleTypeToTextConverter.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Override public Text convert( ValueMetaInterface meta, Object obj ) throws TypeConversionException { try { Text text = new Text(); text.set( meta.getString( obj ) ); return text; } catch ( KettleValueException ex ) { throw new TypeConversionException( BaseMessages.getString( TypeConverterFactory.class, "ErrorConverting", Text.class.getSimpleName(), obj ), ex ); } }
Example 18
Source File: FileBench.java From hadoop-gpu with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") // OutputFormat instantiation static long writeBench(JobConf conf) throws IOException { long filelen = conf.getLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024); Text key = new Text(); Text val = new Text(); final String fn = conf.get("test.filebench.name", ""); final Path outd = FileOutputFormat.getOutputPath(conf); conf.set("mapred.work.output.dir", outd.toString()); OutputFormat outf = conf.getOutputFormat(); RecordWriter<Text,Text> rw = outf.getRecordWriter(outd.getFileSystem(conf), conf, fn, Reporter.NULL); try { long acc = 0L; Date start = new Date(); for (int i = 0; acc < filelen; ++i) { i %= keys.length; key.set(keys[i]); val.set(values[i]); rw.write(key, val); acc += keys[i].length(); acc += values[i].length(); } Date end = new Date(); return end.getTime() - start.getTime(); } finally { rw.close(Reporter.NULL); } }
Example 19
Source File: LexicoderRowSerializer.java From presto with Apache License 2.0 | 4 votes |
@Override public void setDouble(Text text, Double value) { text.set(encode(DOUBLE, value)); }
Example 20
Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0 | 3 votes |
public static Text setCSVOutput(String domain, String ip, String timestamp, String generic) { Text value = new Text(); String csvOut = domain + "," + ip + "," + timestamp + "," + generic; value.set(csvOut); return value; }