org.apache.hadoop.io.serializer.Serializer Java Exaples

Source File: TestMerge.java From hadoop with Apache License 2.0

6 votes

public KeyValueWriter(Configuration conf, OutputStream output,
                      Class<K> kyClass, Class<V> valClass
                     ) throws IOException {
  keyClass = kyClass;
  valueClass = valClass;
  dataBuffer = new DataOutputBuffer();
  SerializationFactory serializationFactory
                                         = new SerializationFactory(conf);
  keySerializer
              = (Serializer<K>)serializationFactory.getSerializer(keyClass);
  keySerializer.open(dataBuffer);
  valueSerializer
            = (Serializer<V>)serializationFactory.getSerializer(valueClass);
  valueSerializer.open(dataBuffer);
  outputStream = new DataOutputStream(output);
}

Source File: ReflectionUtils.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: TestWritableJobConf.java From hadoop-gpu with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: Chain.java From hadoop-gpu with Apache License 2.0

6 votes

private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

@InterfaceStability.Evolving
public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRRuntimeProtos.MRSplitProto createSplitProto(
    T newSplit, SerializationFactory serializationFactory)
    throws IOException, InterruptedException {
  MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto
      .newBuilder();

  builder.setSplitClassName(newSplit.getClass().getName());

  @SuppressWarnings("unchecked")
  Serializer<T> serializer = serializationFactory
      .getSerializer((Class<T>) newSplit.getClass());
  ByteString.Output out = ByteString
      .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE);
  serializer.open(out);
  serializer.serialize(newSplit);
  // TODO MR Compat: Check against max block locations per split.
  ByteString splitBs = out.toByteString();
  builder.setSplitBytes(splitBs);

  return builder.build();
}

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

@Private
public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRSplitProto createSplitProto(
    T newSplit, SerializationFactory serializationFactory)
    throws IOException, InterruptedException {
  MRSplitProto.Builder builder = MRSplitProto
      .newBuilder();
  
  builder.setSplitClassName(newSplit.getClass().getName());

  @SuppressWarnings("unchecked")
  Serializer<T> serializer = serializationFactory
      .getSerializer((Class<T>) newSplit.getClass());
  ByteString.Output out = ByteString
      .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE);
  serializer.open(out);
  serializer.serialize(newSplit);
  // TODO MR Compat: Check against max block locations per split.
  ByteString splitBs = out.toByteString();
  builder.setSplitBytes(splitBs);

  return builder.build();
}

Source File: ReflectionUtils.java From RDFS with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: TestWritableJobConf.java From RDFS with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: Chain.java From RDFS with Apache License 2.0

6 votes

private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}

Source File: OutputCollectorImpl.java From attic-apex-malhar with Apache License 2.0

6 votes

private <T> T cloneObj(T t) throws IOException
{
  Serializer<T> keySerializer;
  Class<T> keyClass;
  PipedInputStream pis = new PipedInputStream();
  PipedOutputStream pos = new PipedOutputStream(pis);
  keyClass = (Class<T>)t.getClass();
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(pos);
  keySerializer.serialize(t);
  Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass);
  keyDesiralizer.open(pis);
  T clonedArg0 = keyDesiralizer.deserialize(null);
  pos.close();
  pis.close();
  keySerializer.close();
  keyDesiralizer.close();
  return clonedArg0;

}

Source File: ReflectionUtils.java From big-c with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: ReduceContextImpl.java From big-c with Apache License 2.0

6 votes

/**
 * This method is called to write the record that was most recently
 * served (before a call to the mark). Since the framework reads one
 * record in advance, to get this record, we serialize the current key
 * and value
 * @param out
 * @throws IOException
 */
private void writeFirstKeyValueBytes(DataOutputStream out) 
throws IOException {
  assert (getCurrentKey() != null && getCurrentValue() != null);
  WritableUtils.writeVInt(out, currentKeyLength);
  WritableUtils.writeVInt(out, currentValueLength);
  Serializer<KEYIN> keySerializer = 
    serializationFactory.getSerializer(keyClass);
  keySerializer.open(out);
  keySerializer.serialize(getCurrentKey());

  Serializer<VALUEIN> valueSerializer = 
    serializationFactory.getSerializer(valueClass);
  valueSerializer.open(out);
  valueSerializer.serialize(getCurrentValue());
}

Source File: Chain.java From big-c with Apache License 2.0

6 votes

private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}

Source File: ReflectionUtils.java From hadoop with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: ReduceContextImpl.java From hadoop with Apache License 2.0

6 votes

/**
 * This method is called to write the record that was most recently
 * served (before a call to the mark). Since the framework reads one
 * record in advance, to get this record, we serialize the current key
 * and value
 * @param out
 * @throws IOException
 */
private void writeFirstKeyValueBytes(DataOutputStream out) 
throws IOException {
  assert (getCurrentKey() != null && getCurrentValue() != null);
  WritableUtils.writeVInt(out, currentKeyLength);
  WritableUtils.writeVInt(out, currentValueLength);
  Serializer<KEYIN> keySerializer = 
    serializationFactory.getSerializer(keyClass);
  keySerializer.open(out);
  keySerializer.serialize(getCurrentKey());

  Serializer<VALUEIN> valueSerializer = 
    serializationFactory.getSerializer(valueClass);
  valueSerializer.open(out);
  valueSerializer.serialize(getCurrentValue());
}

Source File: Chain.java From hadoop with Apache License 2.0

6 votes

private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}

Source File: TestMerge.java From big-c with Apache License 2.0

6 votes

public KeyValueWriter(Configuration conf, OutputStream output,
                      Class<K> kyClass, Class<V> valClass
                     ) throws IOException {
  keyClass = kyClass;
  valueClass = valClass;
  dataBuffer = new DataOutputBuffer();
  SerializationFactory serializationFactory
                                         = new SerializationFactory(conf);
  keySerializer
              = (Serializer<K>)serializationFactory.getSerializer(keyClass);
  keySerializer.open(dataBuffer);
  valueSerializer
            = (Serializer<V>)serializationFactory.getSerializer(valueClass);
  valueSerializer.open(dataBuffer);
  outputStream = new DataOutputStream(output);
}

Source File: TestWritableJobConf.java From hadoop with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: TestWritableJobConf.java From big-c with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: TaggedInputSplit.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void write(DataOutput out) throws IOException {
  Text.writeString(out, inputSplitClass.getName());
  Text.writeString(out, inputFormatClass.getName());
  Text.writeString(out, mapperClass.getName());
  SerializationFactory factory = new SerializationFactory(conf);
  Serializer serializer = 
        factory.getSerializer(inputSplitClass);
  serializer.open((DataOutputStream)out);
  serializer.serialize(inputSplit);
}

Source File: MapOperatorTest.java From attic-apex-malhar with Apache License 2.0

5 votes

public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException
{

  CollectorTestSink sortSink = new CollectorTestSink();
  oper.output.setSink(sortSink);

  oper.setMapClass(WordCount.Map.class);
  oper.setCombineClass(WordCount.Reduce.class);
  oper.setDirName(testMeta.testDir);
  oper.setConfigFile(null);
  oper.setInputFormatClass(TextInputFormat.class);

  Configuration conf = new Configuration();
  JobConf jobConf = new JobConf(conf);
  FileInputFormat.setInputPaths(jobConf, new Path(testMeta.testDir));
  TextInputFormat inputFormat = new TextInputFormat();
  inputFormat.configure(jobConf);
  InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
  keySerializer.open(oper.getOutstream());
  keySerializer.serialize(splits[0]);
  oper.setInputSplitClass(splits[0].getClass());
  keySerializer.close();
  oper.setup(null);
  oper.beginWindow(0);
  oper.emitTuples();
  oper.emitTuples();
  oper.endWindow();
  oper.beginWindow(1);
  oper.emitTuples();
  oper.endWindow();

  Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size());
  for (Object o : sortSink.collectedTuples) {
    LOG.debug(o.toString());
  }
  LOG.debug("Done testing round\n");
  oper.teardown();
}

Source File: TaggedInputSplit.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void write(DataOutput out) throws IOException {
  Text.writeString(out, inputSplitClass.getName());
  Text.writeString(out, inputFormatClass.getName());
  Text.writeString(out, mapperClass.getName());
  SerializationFactory factory = new SerializationFactory(conf);
  Serializer serializer = 
        factory.getSerializer(inputSplitClass);
  serializer.open((DataOutputStream)out);
  serializer.serialize(inputSplit);
}

Source File: PigSplit.java From spork with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void write(DataOutput os) throws IOException {
    os.writeBoolean(disableCounter);
    os.writeBoolean(isMultiInputs);
    os.writeInt(totalSplits);
    os.writeInt(splitIndex);
    os.writeInt(inputIndex);
    writeObject(targetOps, os);
    os.writeInt(wrappedSplits.length);
    Set<String> splitClassNameSet = new HashSet<String>();
    //first get the distinct split class name set
    for ( int i= 0; i < wrappedSplits.length; i++) {
        splitClassNameSet.add(wrappedSplits[i].getClass().getName());
    }
    List<String> distinctSplitClassList = new ArrayList<String>();
    distinctSplitClassList.addAll(splitClassNameSet);
    //write the distinct number of split class name
    os.writeInt(distinctSplitClassList.size());
    //write each classname once
    for (int i = 0 ; i < distinctSplitClassList.size(); i++) {
        os.writeUTF(distinctSplitClassList.get(i));
    }
    SerializationFactory sf = new SerializationFactory(conf);

    for (int i = 0; i < wrappedSplits.length; i++)
    {
        //find out the index of the split class name
        int index = distinctSplitClassList.indexOf(wrappedSplits[i].getClass().getName());
        os.writeInt(index);
        Serializer s = sf.getSerializer(wrappedSplits[i].getClass());
        //Checks if Serializer is NULL or not before calling open() method on it.
        if (s == null) {
            throw new IllegalArgumentException("Could not find Serializer for class "+wrappedSplits[i].getClass()+". InputSplits must implement Writable.");
        }
        s.open((OutputStream) os);
        // The correct call sequence for Serializer is, we shall open, then serialize, but we shall not close
        s.serialize(wrappedSplits[i]);
    }

}

Source File: JobSplitWriter.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static <T extends InputSplit> 
SplitMetaInfo[] writeNewSplits(Configuration conf, 
    T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {

  SplitMetaInfo[] info = new SplitMetaInfo[array.length];
  if (array.length != 0) {
    SerializationFactory factory = new SerializationFactory(conf);
    int i = 0;
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    long offset = out.getPos();
    for(T split: array) {
      long prevCount = out.getPos();
      Text.writeString(out, split.getClass().getName());
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) split.getClass());
      serializer.open(out);
      serializer.serialize(split);
      long currCount = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations, maxBlockLocations);
      }
      info[i++] = 
        new JobSplit.SplitMetaInfo( 
            locations, offset,
            split.getLength());
      offset += currCount - prevCount;
    }
  }
  return info;
}

Source File: JobSplitWriter.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static <T extends InputSplit> 
SplitMetaInfo[] writeNewSplits(Configuration conf, 
    T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {

  SplitMetaInfo[] info = new SplitMetaInfo[array.length];
  if (array.length != 0) {
    SerializationFactory factory = new SerializationFactory(conf);
    int i = 0;
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    long offset = out.getPos();
    for(T split: array) {
      long prevCount = out.getPos();
      Text.writeString(out, split.getClass().getName());
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) split.getClass());
      serializer.open(out);
      serializer.serialize(split);
      long currCount = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations, maxBlockLocations);
      }
      info[i++] = 
        new JobSplit.SplitMetaInfo( 
            locations, offset,
            split.getLength());
      offset += currCount - prevCount;
    }
  }
  return info;
}

Source File: JobClient.java From RDFS with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit>
List<RawSplit> computeNewSplits(JobContext job)
    throws IOException, InterruptedException, ClassNotFoundException {
  JobConf conf = job.getJobConf();
  org.apache.hadoop.mapreduce.InputFormat<?,?> input =
    ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf());

  List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
  T[] array = (T[])
    splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(array, new NewSplitComparator());
  List<RawSplit> rawSplits = new ArrayList<RawSplit>();
  if (array.length != 0) {
    DataOutputBuffer buffer = new DataOutputBuffer();
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer<T> serializer =
      factory.getSerializer((Class<T>) array[0].getClass());
    serializer.open(buffer);
    for(T split: array) {
      RawSplit rawSplit = new RawSplit();
      rawSplit.setClassName(split.getClass().getName());
      buffer.reset();
      serializer.serialize(split);
      rawSplit.setDataLength(split.getLength());
      rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
      rawSplit.setLocations(split.getLocations());
      rawSplits.add(rawSplit);
    }
    serializer.close();
  }
  return rawSplits;
}

Source File: JobClient.java From hadoop-gpu with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> 
int writeNewSplits(JobContext job, Path submitSplitFile
                   ) throws IOException, InterruptedException, 
                            ClassNotFoundException {
  JobConf conf = job.getJobConf();
  org.apache.hadoop.mapreduce.InputFormat<?,?> input =
    ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf());
  
  List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
  T[] array = (T[])
    splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(array, new NewSplitComparator());
  DataOutputStream out = writeSplitsFileHeader(conf, submitSplitFile, 
                                               array.length);
  try {
    if (array.length != 0) {
      DataOutputBuffer buffer = new DataOutputBuffer();
      RawSplit rawSplit = new RawSplit();
      SerializationFactory factory = new SerializationFactory(conf);
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) array[0].getClass());
      serializer.open(buffer);
      for(T split: array) {
        rawSplit.setClassName(split.getClass().getName());
        buffer.reset();
        serializer.serialize(split);
        rawSplit.setDataLength(split.getLength());
        rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
        rawSplit.setLocations(split.getLocations());
        rawSplit.write(out);
      }
      serializer.close();
    }
  } finally {
    out.close();
  }
  return array.length;
}

Source File: TestValuesIterator.java From tez with Apache License 2.0

4 votes

/**
 * create inmemory segments
 *
 * @return
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
  int numberOfStreams = Math.max(2, rnd.nextInt(10));
  LOG.info("No of streams : " + numberOfStreams);

  SerializationFactory serializationFactory = new SerializationFactory(conf);
  Serializer keySerializer = serializationFactory.getSerializer(keyClass);
  Serializer valueSerializer = serializationFactory.getSerializer(valClass);

  LocalDirAllocator localDirAllocator =
      new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
  InputContext context = createTezInputContext();
  MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator,
      context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);

  DataOutputBuffer keyBuf = new DataOutputBuffer();
  DataOutputBuffer valBuf = new DataOutputBuffer();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  keySerializer.open(keyBuf);
  valueSerializer.open(valBuf);

  List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
  for (int i = 0; i < numberOfStreams; i++) {
    BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
    InMemoryWriter writer =
        new InMemoryWriter(bout);
    Map<Writable, Writable> data = createData();
    //write data
    for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
      keySerializer.serialize(entry.getKey());
      valueSerializer.serialize(entry.getValue());
      keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
      valIn.reset(valBuf.getData(), 0, valBuf.getLength());
      writer.append(keyIn, valIn);
      originalData.put(entry.getKey(), entry.getValue());
      keyBuf.reset();
      valBuf.reset();
      keyIn.reset();
      valIn.reset();
    }
    IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0,
        bout.getBuffer().length);
    segments.add(new TezMerger.Segment(reader, null));

    data.clear();
    writer.close();
  }
  return segments;
}

Source File: TezBytesWritableSerialization.java From tez with Apache License 2.0

4 votes

@Override
public Serializer<Writable> getSerializer(Class<Writable> c) {
  return new TezBytesWritableSerializer();
}

Source File: BinaryBlockSerialization.java From systemds with Apache License 2.0

4 votes

@Override
public Serializer<MatrixBlock> getSerializer(Class arg0) 
{
	return new MatrixBlockSerializer();
}

org.apache.hadoop.io.serializer.Serializer Java Examples