org.apache.hadoop.io.serializer.SerializationFactory#getDeserializer

Source File: Task.java From hadoop with Apache License 2.0

6 votes

public ValuesIterator (RawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}

Source File: ReflectionUtils.java From big-c with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: Task.java From RDFS with Apache License 2.0

6 votes

public ValuesIterator (RawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}

Source File: Task.java From big-c with Apache License 2.0

6 votes

public ValuesIterator (RawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}

Source File: TestWritableJobConf.java From big-c with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: ReflectionUtils.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload(
    MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  Preconditions.checkNotNull(splitProto, "splitProto must be specified");
  String className = splitProto.getSplitClassName();
  Class<org.apache.hadoop.mapreduce.InputSplit> clazz;

  try {
    clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class
        .forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer
      .deserialize(null);
  deserializer.close();
  return inputSplit;
}

Source File: ValuesIterator.java From incubator-tez with Apache License 2.0

6 votes

public ValuesIterator (TezRawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}

Source File: ReflectionUtils.java From hadoop with Apache License 2.0

6 votes

/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

/**
 * Create an instance of {@link org.apache.hadoop.mapreduce.InputSplit} from the {@link
 * org.apache.tez.mapreduce.input.MRInput} representation of a split.
 *
 * @param splitProto           The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto}
 *                             instance representing the split
 * @param serializationFactory the serialization mechanism used to write out the split
 * @return an instance of the split
 * @throws IOException
 */
@InterfaceStability.Evolving
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload(
    MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  Objects.requireNonNull(splitProto, "splitProto must be specified");
  String className = splitProto.getSplitClassName();
  Class<org.apache.hadoop.mapreduce.InputSplit> clazz;

  try {
    clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class
        .forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer
      .deserialize(null);
  deserializer.close();
  return inputSplit;
}

Source File: TestWritableJobConf.java From hadoop with Apache License 2.0

6 votes

private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}

Source File: DefaultStringifier.java From RDFS with Apache License 2.0

5 votes

public DefaultStringifier(Configuration conf, Class<T> c) {

    SerializationFactory factory = new SerializationFactory(conf);
    this.serializer = factory.getSerializer(c);
    this.deserializer = factory.getDeserializer(c);
    this.inBuf = new DataInputBuffer();
    this.outBuf = new DataOutputBuffer();
    try {
      serializer.open(outBuf);
      deserializer.open(inBuf);
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

Source File: DefaultStringifier.java From big-c with Apache License 2.0

5 votes

public DefaultStringifier(Configuration conf, Class<T> c) {

    SerializationFactory factory = new SerializationFactory(conf);
    this.serializer = factory.getSerializer(c);
    this.deserializer = factory.getDeserializer(c);
    this.inBuf = new DataInputBuffer();
    this.outBuf = new DataOutputBuffer();
    try {
      serializer.open(outBuf);
      deserializer.open(inBuf);
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

Source File: TestIFile.java From tez with Apache License 2.0

5 votes

/**
 * Data verification
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data)
    throws IOException {
  LOG.info("Data verification");
  Text readKey = new Text();
  IntWritable readValue = new IntWritable();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  Deserializer<Text> keyDeserializer;
  Deserializer<IntWritable> valDeserializer;
  SerializationFactory serializationFactory = new SerializationFactory(
      defaultConf);
  keyDeserializer = serializationFactory.getDeserializer(Text.class);
  valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
  keyDeserializer.open(keyIn);
  valDeserializer.open(valIn);

  int numRecordsRead = 0;

  while (reader.nextRawKey(keyIn)) {
    reader.nextRawValue(valIn);
    readKey = keyDeserializer.deserialize(readKey);
    readValue = valDeserializer.deserialize(readValue);

    KVPair expected = data.get(numRecordsRead);
    assertEquals("Key does not match: Expected: " + expected.getKey()
        + ", Read: " + readKey, expected.getKey(), readKey);
    assertEquals("Value does not match: Expected: " + expected.getvalue()
        + ", Read: " + readValue, expected.getvalue(), readValue);

    numRecordsRead++;
  }
  assertEquals("Expected: " + data.size() + " records, but found: "
      + numRecordsRead, data.size(), numRecordsRead);
  LOG.info("Found: " + numRecordsRead + " records");
}

Source File: MRInputUtils.java From incubator-tez with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
    JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  long offset = splitMetaInfo.getStartOffset();

  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapred.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}

Source File: RubixInputSplit.java From Cubert with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Override
public void readFields(DataInput in) throws IOException
{
    int keyBytesLen = in.readInt();
    byte[] keyBytes = new byte[keyBytesLen];
    in.readFully(keyBytes, 0, keyBytesLen);

    filename = new Path(in.readUTF());
    offset = in.readLong();
    length = in.readLong();
    blockId = in.readLong();
    numRecords = in.readLong();
    try
    {
        keyClass = (Class<K>) ClassCache.forName(in.readUTF());
        valueClass = (Class<V>) ClassCache.forName(in.readUTF());

        SerializationFactory serializationFactory = new SerializationFactory(conf);
        Deserializer<K> keyDeserializer =
                serializationFactory.getDeserializer(keyClass);

        ByteArrayInputStream bis = new ByteArrayInputStream(keyBytes);
        keyDeserializer.open(bis);

        key = keyDeserializer.deserialize(null);

        ObjectMapper mapper = new ObjectMapper();
        schema = new BlockSchema(mapper.readValue(in.readUTF(), JsonNode.class));
        blockSerializationType = BlockSerializationType.values()[in.readInt()];
    }
    catch (ClassNotFoundException e)
    {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

Source File: MRInputUtils.java From tez with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
    JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  long offset = splitMetaInfo.getStartOffset();

  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapred.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}

Source File: WALFile.java From streamx with Apache License 2.0

4 votes

private <T> Deserializer<T> getDeserializer(SerializationFactory sf, Class<T> c) {
  return sf.getDeserializer(c);
}

Source File: RubixRecordReader.java From Cubert with Apache License 2.0

4 votes

public void initialize(InputSplit split, Configuration conf) throws IOException,
        InterruptedException
{
    @SuppressWarnings("unchecked")
    RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split;

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    switch (rsplit.getBlockSerializationType())
    {
    case DEFAULT:
        valueDeserializer =
                serializationFactory.getDeserializer(rsplit.getValueClass());
        break;
    case COMPACT:
        BlockSchema schema = rsplit.getSchema();
        valueDeserializer = new CompactDeserializer<V>(schema);
        break;
    }

    key = rsplit.getKey();

    // store the blockid and partition key in the conf
    conf.setLong("MY_BLOCK_ID", rsplit.getBlockId());
    conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords());
    ByteArrayOutputStream tmpOut = new ByteArrayOutputStream();
    ((Tuple) key).write(new DataOutputStream(tmpOut));
    String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray());
    conf.set("MY_PARTITION_KEY", keySerialized);

    Path path = rsplit.getFilename();
    offset = rsplit.getOffset();
    length = rsplit.getLength();

    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    fsin.seek(offset);

    blockInputStream = new BlockInputStream(fsin, length);
    in = blockInputStream;

    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
    if (codec != null)
    {
        print.f("codec is not null and it is %s", codec.getClass().toString());
        in = codec.createInputStream(in);
    }
    else
    {
        print.f("codec is null");
    }

    valueDeserializer.open(in);
}

Source File: SequenceFile.java From RDFS with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
private Deserializer getDeserializer(SerializationFactory sf, Class c) {
  return sf.getDeserializer(c);
}

Java Code Examples for org.apache.hadoop.io.serializer.SerializationFactory#getDeserializer()