org.apache.hadoop.io.Text#set

Source File: SparkFactDistinct.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void addFieldValue(DataType type, Integer colIndex, String value,
        List<Tuple2<SelfDefineSortableKey, Text>> result) {
    int reducerIndex = reducerMapping.getReducerIdForCol(colIndex, value);
    tmpbuf.clear();
    byte[] valueBytes = Bytes.toBytes(value);
    int size = valueBytes.length + 1;
    if (size >= tmpbuf.capacity()) {
        tmpbuf = ByteBuffer.allocate(countNewSize(tmpbuf.capacity(), size));
    }
    tmpbuf.put(Bytes.toBytes(reducerIndex)[3]);
    tmpbuf.put(valueBytes);

    Text outputKey = new Text();
    SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    outputKey.set(tmpbuf.array(), 0, tmpbuf.position());
    sortableKey.init(outputKey, type);

    result.add(new Tuple2<SelfDefineSortableKey, Text>(sortableKey, new Text()));

    // log a few rows for troubleshooting
    if (result.size() < 10) {
        logger.info("Sample output: {} '{}' => reducer {}", allCols.get(colIndex), value, reducerIndex);
    }
}

Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0

6 votes

public static void extractCSVOutputIdentityStripFirstField(Text value, Text input)
{
  String csvOut = input.toString();
  String tokens[] = csvOut.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)", -1);

  if (tokens.length > 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3], tokens[4]);
  }
  else if (tokens.length == 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3]);
  }
  else
  {
    logger.info("WARN: tokens.length = " + tokens.length + " != 4 for input = " + csvOut);
    value.set(input.toString());
  }
}

Source File: TestContainerLocalizer.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
      throws IOException {
    Credentials creds = new Credentials();
    byte[] password = new byte[20];
    Text kind = new Text();
    Text service = new Text();
    Text alias = new Text();
    for (int i = 0; i < nTok; ++i) {
      byte[] identifier = ("idef" + i).getBytes();
      r.nextBytes(password);
      kind.set("kind" + i);
      service.set("service" + i);
      alias.set("token" + i);
      Token token = new Token(identifier, password, kind, service);
      creds.addToken(alias, token);
    }
    DataOutputBuffer buf = new DataOutputBuffer();
    creds.writeTokenStorageToStream(buf);
    DataInputBuffer ret = new DataInputBuffer();
    ret.reset(buf.getData(), 0, buf.getLength());
    return ret;
  }

Source File: SelfDefineSortableKeyTest.java From kylin with Apache License 2.0

6 votes

private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
    int partationId = 0;
    ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
    for (String str : strNumList) {
        ByteBuffer keyBuffer = ByteBuffer.allocate(4096);
        int offset = keyBuffer.position();
        keyBuffer.put(Bytes.toBytes(partationId)[3]);
        keyBuffer.put(Bytes.toBytes(str));
        Bytes.copy(keyBuffer.array(), 1, keyBuffer.position() - offset - 1);
        Text outputKey = new Text();
        outputKey.set(keyBuffer.array(), offset, keyBuffer.position() - offset);
        SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();
        sortableKey.init(outputKey, typeFlag);
        keyList.add(sortableKey);
    }
    return keyList;
}

Source File: OutputToTextMapper.java From wikireverse with MIT License

5 votes

public void map(Text lowerCaseKey, LinkArrayWritable value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
	
	try {
		int recordCount = 0;
		Text outputValue = new Text();

		String correctCase = value.getMostUsedArticleCasing();
		String correctKey = lowerCaseKey.toString().replace(correctCase.toLowerCase(), correctCase);
		
		if (correctKey.endsWith("//")) {
			correctKey = correctKey.replaceAll("////", "");
		}
	
		Text key = new Text(correctKey);
		
		for (Writable rawValue : value.get()) {
			LinkWritable link = (LinkWritable)rawValue;
			outputValue.set(link.toString());

			output.collect(key, outputValue);
			recordCount++;
		}
		
		reporter.incrCounter(COUNTER_GROUP, RECORDS_FETCHED, 1);
		reporter.incrCounter(COUNTER_GROUP, RESULTS_OUTPUT, recordCount);
		
	} catch(Exception e) {
		reporter.incrCounter(COUNTER_GROUP, MAP_EXCEPTION, 1);
		LOG.error(StringUtils.stringifyException(e));
	}
}

Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0

5 votes

private int generateSentence(Text t, int noWords) {
  sentence.setLength(0);
  --noWords;
  for (int i = 0; i < noWords; ++i) {
    sentence.append(words[r.nextInt(words.length)]);
    sentence.append(" ");
  }
  if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
  t.set(sentence.toString());
  return sentence.length();
}

Source File: RandomWriter.java From big-c with Apache License 2.0

5 votes

public boolean nextKeyValue() {
  if (name != null) {
    key = new Text();
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}

Source File: TestMapRed.java From RDFS with Apache License 2.0

5 votes

public void testNullKeys() throws Exception {
  JobConf conf = new JobConf(TestMapRed.class);
  FileSystem fs = FileSystem.getLocal(conf);
  Path testdir = new Path(
      System.getProperty("test.build.data","/tmp")).makeQualified(fs);
  fs.delete(testdir, true);
  Path inFile = new Path(testdir, "nullin/blah");
  SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile,
      NullWritable.class, Text.class, SequenceFile.CompressionType.NONE);
  Text t = new Text();
  t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t);
  t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t);
  t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t);
  t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t);
  t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t);
  t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t);
  t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t);
  t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t);
  w.close();
  FileInputFormat.setInputPaths(conf, inFile);
  FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout"));
  conf.setMapperClass(NullMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setNumReduceTasks(1);

  JobClient.runJob(conf);

  SequenceFile.Reader r = new SequenceFile.Reader(fs,
      new Path(testdir, "nullout/part-00000"), conf);
  String m = "AAAAAAAAAAAAAA";
  for (int i = 1; r.next(NullWritable.get(), t); ++i) {
    assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString()));
    m = m.replace((char)('A' + i - 1), (char)('A' + i));
  }
}

Source File: EsInputFormat.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
protected Text setCurrentKey(Text hadoopKey, Object object) {
    if (hadoopKey != null) {
        hadoopKey.set(object.toString());
    }
    return hadoopKey;
}

Source File: FactDistinctColumnsReducerTest.java From kylin with Apache License 2.0

5 votes

@Test
public void testReducerStatistics() throws IOException {
    setConfigurations();
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_STATISTICS, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    // override the task id
    int dimColsSize = cubeDesc.getRowkey().getRowKeyColumns().length;
    int uhcSize = cubeDesc.getAllUHCColumns().size();
    final int targetTaskId = (dimColsSize - uhcSize) + uhcSize * cubeDesc.getConfig().getUHCReducerCount();

    setContextTaskId(targetTaskId);
    ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
    tmpBuf.put((byte) FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER); // one byte
    tmpBuf.putLong(100);
    Text outputKey1 = new Text();
    outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
    SelfDefineSortableKey key1 = new SelfDefineSortableKey();
    key1.init(outputKey1, (byte) 0);

    HLLCounter hll = createMockHLLCounter();
    ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
    hllBuf.clear();
    hll.writeRegisters(hllBuf);
    Text value1 = new Text();
    value1.set(hllBuf.array(), 0, hllBuf.position());

    reduceDriver.setInput(key1, ImmutableList.of(value1));

    List<Pair<NullWritable, Text>> result = reduceDriver.run();
    assertEquals(0, result.size()); // the reducer output statistics info to a sequence file.
}

Source File: MockRecordReader.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

@Override
public boolean next( Text key, Text value ) throws IOException {
  if ( !rowIter.hasNext() ) {
    return false;
  }
  rowNum++;
  key.set( String.valueOf( rowNum ) );
  value.set( rowIter.next() );
  return true;
}

Source File: SequenceFileAsTextRecordReader.java From big-c with Apache License 2.0

5 votes

/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}

Source File: PatternMatcherTest.java From jumbune with GNU Lesser General Public License v3.0

5 votes

@Test
public void matchTestAgainstStringNull() {
	Text value = new Text();
	value.set("null");
	boolean check = PatternMatcher.match(value);
	assertFalse(check);
}

Source File: TestIndexedSort.java From hadoop with Apache License 2.0

5 votes

private static void genRandom(Text t, int len, StringBuilder sb) {
  sb.setLength(0);
  for (int i = 0; i < len; ++i) {
    sb.append(Integer.toString(r.nextInt(26) + 10, 36));
  }
  t.set(sb.toString());
}

Source File: SequenceFileAsTextRecordReader.java From hadoop-gpu with Apache License 2.0

5 votes

/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}

Source File: SequenceFileProtobufWriter.java From hiped2 with Apache License 2.0

5 votes

/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

Configuration conf = super.getConf();

ProtobufSerialization.register(conf);

SequenceFile.Writer writer =
    SequenceFile.createWriter(conf,
        SequenceFile.Writer.file(outputPath),
        SequenceFile.Writer.keyClass(Text.class),
        SequenceFile.Writer.valueClass(Stock.class),
        SequenceFile.Writer.compression(
            SequenceFile.CompressionType.BLOCK,
            new DefaultCodec())
    );
try {
  Text key = new Text();

  for (Stock stock : StockUtils.fromCsvFile(inputFile)) {
    key.set(stock.getSymbol());
    writer.append(key, stock);
  }
} finally {
  writer.close();
}
    return 0;
  }

Source File: KettleTypeToTextConverter.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

@Override
public Text convert( ValueMetaInterface meta, Object obj ) throws TypeConversionException {
  try {
    Text text = new Text();
    text.set( meta.getString( obj ) );
    return text;
  } catch ( KettleValueException ex ) {
    throw new TypeConversionException(
      BaseMessages.getString( TypeConverterFactory.class, "ErrorConverting", Text.class.getSimpleName(), obj ), ex );
  }
}

Source File: FileBench.java From hadoop-gpu with Apache License 2.0

5 votes

@SuppressWarnings("unchecked") // OutputFormat instantiation
static long writeBench(JobConf conf) throws IOException {
  long filelen = conf.getLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024);
  Text key = new Text();
  Text val = new Text();

  final String fn = conf.get("test.filebench.name", "");
  final Path outd = FileOutputFormat.getOutputPath(conf);
  conf.set("mapred.work.output.dir", outd.toString());
  OutputFormat outf = conf.getOutputFormat();
  RecordWriter<Text,Text> rw =
    outf.getRecordWriter(outd.getFileSystem(conf), conf, fn,
                         Reporter.NULL);
  try {
    long acc = 0L;
    Date start = new Date();
    for (int i = 0; acc < filelen; ++i) {
      i %= keys.length;
      key.set(keys[i]);
      val.set(values[i]);
      rw.write(key, val);
      acc += keys[i].length();
      acc += values[i].length();
    }
    Date end = new Date();
    return end.getTime() - start.getTime();
  } finally {
    rw.close(Reporter.NULL);
  }
}

Source File: LexicoderRowSerializer.java From presto with Apache License 2.0

4 votes

@Override
public void setDouble(Text text, Double value)
{
    text.set(encode(DOUBLE, value));
}

Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0

3 votes

public static Text setCSVOutput(String domain, String ip, String timestamp, String generic)
{
  Text value = new Text();

  String csvOut = domain + "," + ip + "," + timestamp + "," + generic;

  value.set(csvOut);

  return value;
}

Java Code Examples for org.apache.hadoop.io.Text#set()