org.apache.hadoop.io.UTF8 Java Exaples

Source File: TestFileSystem.java From RDFS with Apache License 2.0

6 votes

public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: MRBench.java From hadoop with Apache License 2.0

6 votes

/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  jobConf
      .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
  return jobConf; 
}

Source File: TestFileSystem.java From RDFS with Apache License 2.0

6 votes

public static void seekTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job,CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(SeekMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: TestFileSystem.java From RDFS with Apache License 2.0

6 votes

public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: MRBench.java From big-c with Apache License 2.0

6 votes

/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  jobConf
      .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
  return jobConf; 
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

6 votes

public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: MRBench.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Create the job configuration.
 */
private static JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  
  return jobConf; 
}

Source File: KafkaKey.java From HiveKa with Apache License 2.0

6 votes

@Override
public void readFields(DataInput in) throws IOException {
  this.leaderId = UTF8.readString(in);
  this.partition = in.readInt();
  this.beginOffset = in.readLong();
  this.offset = in.readLong();
  this.checksum = in.readLong();
  this.topic = in.readUTF();
  this.time = in.readLong();
  this.server = in.readUTF(); // left for legacy
  this.service = in.readUTF(); // left for legacy
  this.partitionMap = new MapWritable();
  try {
    this.partitionMap.readFields(in);
  } catch (IOException e) {
    this.setServer(this.server);
    this.setService(this.service);
  }
}

Source File: MRBench.java From RDFS with Apache License 2.0

6 votes

/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  
  return jobConf; 
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

6 votes

public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: TestParquetMetadataConverter.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}

Source File: TestParquetMetadataConverter.java From parquet-mr with Apache License 2.0

6 votes

private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}

Source File: IOMapperBase.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Map file name and offset into statistical data.
 * <p>
 * The map task is to get the 
 * <tt>key</tt>, which contains the file name, and the 
 * <tt>value</tt>, which is the offset within the file.
 * 
 * The parameters are passed to the abstract method 
 * {@link #doIO(Reporter,String,long)}, which performs the io operation, 
 * usually read or write data, and then 
 * {@link #collectStats(OutputCollector,String,long,Object)} 
 * is called to prepare stat data for a subsequent reducer.
 */
public void map(UTF8 key, 
                LongWritable value,
                OutputCollector<UTF8, UTF8> output, 
                Reporter reporter) throws IOException {
  String name = key.toString();
  long longValue = value.get();
  
  reporter.setStatus("starting " + name + " ::host = " + hostName);
  
  long tStart = System.currentTimeMillis();
  Object statValue = doIO(reporter, name, longValue);
  long tEnd = System.currentTimeMillis();
  long execTime = tEnd - tStart;
  collectStats(output, name, execTime, statValue);
  
  reporter.setStatus("finished " + name + " ::host = " + hostName);
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

6 votes

public static void seekTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job,CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(SeekMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: NamespaceInfo.java From RDFS with Apache License 2.0

5 votes

public void readFields(DataInput in) throws IOException {
  buildVersion = UTF8.readString(in);
  layoutVersion = in.readInt();
  namespaceID = in.readInt();
  cTime = in.readLong();
  distributedUpgradeVersion = in.readInt();
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

5 votes

public static void createControlFile(FileSystem fs,
                                     long megaBytes, int numFiles,
                                     long seed) throws Exception {

  LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");

  Path controlFile = new Path(CONTROL_DIR, "files");
  fs.delete(controlFile, true);
  Random random = new Random(seed);

  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, conf, controlFile, 
                              UTF8.class, LongWritable.class, CompressionType.NONE);

  long totalSize = 0;
  long maxSize = ((megaBytes / numFiles) * 2) + 1;
  try {
    while (totalSize < megaBytes) {
      UTF8 name = new UTF8(Long.toString(random.nextLong()));

      long size = random.nextLong();
      if (size < 0)
        size = -size;
      size = size % maxSize;

      //LOG.info(" adding: name="+name+" size="+size);

      writer.append(name, new LongWritable(size));

      totalSize += size;
    }
  } finally {
    writer.close();
  }
  LOG.info("created control file for: "+totalSize+" bytes");
}

Source File: AccumulatingReducer.java From hadoop-gpu with Apache License 2.0

5 votes

public void reduce(UTF8 key, 
                   Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, 
                   Reporter reporter
                   ) throws IOException {
  String field = key.toString();

  reporter.setStatus("starting " + field + " ::host = " + hostName);

  // concatenate strings
  if (field.startsWith("s:")) {
    String sSum = "";
    while (values.hasNext())
      sSum += values.next().toString() + ";";
    output.collect(key, new UTF8(sSum));
    reporter.setStatus("finished " + field + " ::host = " + hostName);
    return;
  }
  // sum long values
  if (field.startsWith("f:")) {
    float fSum = 0;
    while (values.hasNext())
      fSum += Float.parseFloat(values.next().toString());
    output.collect(key, new UTF8(String.valueOf(fSum)));
    reporter.setStatus("finished " + field + " ::host = " + hostName);
    return;
  }
  // sum long values
  if (field.startsWith("l:")) {
    long lSum = 0;
    while (values.hasNext()) {
      lSum += Long.parseLong(values.next().toString());
    }
    output.collect(key, new UTF8(String.valueOf(lSum)));
  }
  reporter.setStatus("finished " + field + " ::host = " + hostName);
}

Source File: NamespaceInfo.java From RDFS with Apache License 2.0

5 votes

public void write(DataOutput out) throws IOException {
  UTF8.writeString(out, getBuildVersion());
  out.writeInt(getLayoutVersion());
  out.writeInt(getNamespaceID());
  out.writeLong(getCTime());
  out.writeInt(getDistributedUpgradeVersion());
}

Source File: DatanodeDescriptor.java From RDFS with Apache License 2.0

5 votes

/** Serialization for FSEditLog */
void readFieldsFromFSEditLog(DataInput in) throws IOException {
  this.name = UTF8.readString(in);
  this.storageID = UTF8.readString(in);
  this.infoPort = in.readShort() & 0x0000ffff;

  this.capacity = in.readLong();
  this.dfsUsed = in.readLong();
  this.remaining = in.readLong();
  this.lastUpdate = in.readLong();
  this.xceiverCount = in.readInt();
  this.location = Text.readString(in);
  this.hostName = Text.readString(in);
  setAdminState(WritableUtils.readEnum(in, AdminStates.class));
}

Source File: FSImageSerialization.java From RDFS with Apache License 2.0

5 votes

/**
 * Reading the path from the image and converting it to byte[][] directly
 * this saves us an array copy and conversions to and from String
 * @param in
 * @return the array each element of which is a byte[] representation 
 *            of a path component
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static byte[][] readPathComponents(DataInputStream in)
    throws IOException {
  UTF8 ustr = TL_DATA.get().U_STR;
  
  ustr.readFields(in);
  return DFSUtil.bytes2byteArray(ustr.getBytes(),
    ustr.getLength(), (byte) Path.SEPARATOR_CHAR);
}

Source File: FSImageSerialization.java From RDFS with Apache License 2.0

5 votes

@SuppressWarnings("deprecation")
public static byte[] readBytes(DataInputStream in) throws IOException {
  UTF8 ustr = TL_DATA.get().U_STR;
  ustr.readFields(in);
  int len = ustr.getLength();
  byte[] bytes = new byte[len];
  System.arraycopy(ustr.getBytes(), 0, bytes, 0, len);
  return bytes;
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

5 votes

public void map(UTF8 key, LongWritable value,
                OutputCollector<UTF8, LongWritable> collector,
                Reporter reporter)
  throws IOException {
  
  String name = key.toString();
  long size = value.get();
  long seed = Long.parseLong(name);

  random.setSeed(seed);
  reporter.setStatus("creating " + name);

  // write to temp file initially to permit parallel execution
  Path tempFile = new Path(DATA_DIR, name+suffix);
  OutputStream out = fs.create(tempFile);

  long written = 0;
  try {
    while (written < size) {
      if (fastCheck) {
        Arrays.fill(buffer, (byte)random.nextInt(Byte.MAX_VALUE));
      } else {
        random.nextBytes(buffer);
      }
      long remains = size - written;
      int length = (remains<=buffer.length) ? (int)remains : buffer.length;
      out.write(buffer, 0, length);
      written += length;
      reporter.setStatus("writing "+name+"@"+written+"/"+size);
    }
  } finally {
    out.close();
  }
  // rename to final location
  fs.rename(tempFile, new Path(DATA_DIR, name));

  collector.collect(new UTF8("bytes"), new LongWritable(written));

  reporter.setStatus("wrote " + name);
}

Source File: MRBench.java From hadoop with Apache License 2.0

5 votes

public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}

Source File: MRBench.java From hadoop-gpu with Apache License 2.0

5 votes

public void map(WritableComparable key, Text value,
                OutputCollector<UTF8, UTF8> output,
                Reporter reporter) throws IOException 
{
  String line = value.toString();
  output.collect(new UTF8(process(line)), new UTF8(""));		
}

Source File: MRBench.java From hadoop-gpu with Apache License 2.0

5 votes

public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}

Source File: DatanodeID.java From hadoop-gpu with Apache License 2.0

5 votes

/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
  name = UTF8.readString(in);
  storageID = UTF8.readString(in);
  // the infoPort read could be negative, if the port is a large number (more
  // than 15 bits in storage size (but less than 16 bits).
  // So chop off the first two bytes (and hence the signed bits) before 
  // setting the field.
  this.infoPort = in.readShort() & 0x0000ffff;
}

Source File: DatanodeDescriptor.java From hadoop-gpu with Apache License 2.0

5 votes

/** Serialization for FSEditLog */
void readFieldsFromFSEditLog(DataInput in) throws IOException {
  this.name = UTF8.readString(in);
  this.storageID = UTF8.readString(in);
  this.infoPort = in.readShort() & 0x0000ffff;

  this.capacity = in.readLong();
  this.dfsUsed = in.readLong();
  this.remaining = in.readLong();
  this.lastUpdate = in.readLong();
  this.xceiverCount = in.readInt();
  this.location = Text.readString(in);
  this.hostName = Text.readString(in);
  setAdminState(WritableUtils.readEnum(in, AdminStates.class));
}

Source File: NamespaceInfo.java From hadoop-gpu with Apache License 2.0

5 votes

public void write(DataOutput out) throws IOException {
  UTF8.writeString(out, getBuildVersion());
  out.writeInt(getLayoutVersion());
  out.writeInt(getNamespaceID());
  out.writeLong(getCTime());
  out.writeInt(getDistributedUpgradeVersion());
}

Source File: NamespaceInfo.java From hadoop-gpu with Apache License 2.0

5 votes

public void readFields(DataInput in) throws IOException {
  buildVersion = UTF8.readString(in);
  layoutVersion = in.readInt();
  namespaceID = in.readInt();
  cTime = in.readLong();
  distributedUpgradeVersion = in.readInt();
}

Source File: MRBench.java From RDFS with Apache License 2.0

5 votes

public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}

org.apache.hadoop.io.UTF8 Java Examples