Java Code Examples for org.apache.hadoop.io.serializer.Deserializer#open()
The following examples show how to use
org.apache.hadoop.io.serializer.Deserializer#open() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Chain.java From hadoop with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example 2
Source File: ReflectionUtils.java From hadoop with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param src the object to copy from * @param dst the object to copy into, which is destroyed * @return dst param (the copy) * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example 3
Source File: TestWritableJobConf.java From big-c with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example 4
Source File: ReflectionUtils.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param dst the object to copy from * @param src the object to copy into, which is destroyed * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example 5
Source File: ReflectionUtils.java From big-c with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param src the object to copy from * @param dst the object to copy into, which is destroyed * @return dst param (the copy) * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example 6
Source File: OutputCollectorImpl.java From attic-apex-malhar with Apache License 2.0 | 6 votes |
private <T> T cloneObj(T t) throws IOException { Serializer<T> keySerializer; Class<T> keyClass; PipedInputStream pis = new PipedInputStream(); PipedOutputStream pos = new PipedOutputStream(pis); keyClass = (Class<T>)t.getClass(); keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(pos); keySerializer.serialize(t); Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass); keyDesiralizer.open(pis); T clonedArg0 = keyDesiralizer.deserialize(null); pos.close(); pis.close(); keySerializer.close(); keyDesiralizer.close(); return clonedArg0; }
Example 7
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
/** * Create an instance of {@link org.apache.hadoop.mapred.InputSplit} from the {@link * org.apache.tez.mapreduce.input.MRInput} representation of a split. * * @param splitProto The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto} * instance representing the split * @param serializationFactory the serialization mechanism used to write out the split * @return an instance of the split * @throws java.io.IOException */ @SuppressWarnings("unchecked") @InterfaceStability.Evolving @InterfaceAudience.LimitedPrivate({"hive, pig"}) public static InputSplit createOldFormatSplitFromUserPayload( MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory) throws IOException { // This may not need to use serialization factory, since OldFormat // always uses Writable to write splits. Objects.requireNonNull(splitProto, "splitProto cannot be null"); String className = splitProto.getSplitClassName(); Class<InputSplit> clazz; try { clazz = (Class<InputSplit>) Class.forName(className); } catch (ClassNotFoundException e) { throw new IOException("Failed to load InputSplit class: [" + className + "]", e); } Deserializer<InputSplit> deserializer = serializationFactory .getDeserializer(clazz); deserializer.open(splitProto.getSplitBytes().newInput()); InputSplit inputSplit = deserializer.deserialize(null); deserializer.close(); return inputSplit; }
Example 8
Source File: ReflectionUtils.java From RDFS with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param dst the object to copy from * @param src the object to copy into, which is destroyed * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example 9
Source File: Chain.java From RDFS with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example 10
Source File: TestWritableJobConf.java From RDFS with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example 11
Source File: MRInputUtils.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file); LOG.info("Reading input split file from : " + file); long offset = splitMetaInfo.getStartOffset(); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapred.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); if (splitBytesCounter != null) { splitBytesCounter.increment(pos - offset); } inFile.close(); return split; }
Example 12
Source File: TestPipelinedSorter.java From tez with Apache License 2.0 | 5 votes |
private void verifyData(IFile.Reader reader) throws IOException { Text readKey = new Text(); Text readValue = new Text(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer<Text> keyDeserializer = serializationFactory.getDeserializer(Text.class); Deserializer<Text> valDeserializer = serializationFactory.getDeserializer(Text.class); keyDeserializer.open(keyIn); valDeserializer.open(valIn); int numRecordsRead = 0; for (Map.Entry<String, String> entry : sortedDataMap.entrySet()) { String key = entry.getKey(); String val = entry.getValue(); if (reader.nextRawKey(keyIn)) { reader.nextRawValue(valIn); readKey = keyDeserializer.deserialize(readKey); readValue = valDeserializer.deserialize(readValue); Assert.assertTrue(key.equalsIgnoreCase(readKey.toString())); Assert.assertTrue(val.equalsIgnoreCase(readValue.toString())); numRecordsRead++; } } Assert.assertTrue(numRecordsRead == sortedDataMap.size()); }
Example 13
Source File: MRInputUtils.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk( TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); long offset = splitMetaInfo.getStartOffset(); // Split information read from local filesystem. FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file); LOG.info("Reading input split file from : " + file); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapreduce.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); if (splitBytesCounter != null) { splitBytesCounter.increment(pos - offset); } inFile.close(); return split; }
Example 14
Source File: TestIFile.java From incubator-tez with Apache License 2.0 | 5 votes |
/** * Data verification * * @param reader * @param data * @throws IOException */ private void verifyData(Reader reader, List<KVPair> data) throws IOException { LOG.info("Data verification"); Text readKey = new Text(); IntWritable readValue = new IntWritable(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); Deserializer<Text> keyDeserializer; Deserializer<IntWritable> valDeserializer; SerializationFactory serializationFactory = new SerializationFactory( defaultConf); keyDeserializer = serializationFactory.getDeserializer(Text.class); valDeserializer = serializationFactory.getDeserializer(IntWritable.class); keyDeserializer.open(keyIn); valDeserializer.open(valIn); int numRecordsRead = 0; while (reader.nextRawKey(keyIn)) { reader.nextRawValue(valIn); readKey = keyDeserializer.deserialize(readKey); readValue = valDeserializer.deserialize(readValue); KVPair expected = data.get(numRecordsRead); assertEquals("Key does not match: Expected: " + expected.getKey() + ", Read: " + readKey, expected.getKey(), readKey); assertEquals("Value does not match: Expected: " + expected.getvalue() + ", Read: " + readValue, expected.getvalue(), readValue); numRecordsRead++; } assertEquals("Expected: " + data.size() + " records, but found: " + numRecordsRead, data.size(), numRecordsRead); LOG.info("Found: " + numRecordsRead + " records"); }
Example 15
Source File: PigSplit.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public void readFields(DataInput is) throws IOException { disableCounter = is.readBoolean(); isMultiInputs = is.readBoolean(); totalSplits = is.readInt(); splitIndex = is.readInt(); inputIndex = is.readInt(); targetOps = (ArrayList<OperatorKey>) readObject(is); int splitLen = is.readInt(); int distinctSplitClassCount = is.readInt(); //construct the input split class name list String[] distinctSplitClassName = new String[distinctSplitClassCount]; for (int i = 0; i < distinctSplitClassCount; i++) { distinctSplitClassName[i] = is.readUTF(); } try { SerializationFactory sf = new SerializationFactory(conf); // The correct call sequence for Deserializer is, we shall open, then deserialize, but we shall not close wrappedSplits = new InputSplit[splitLen]; for (int i = 0; i < splitLen; i++) { //read the className index int index = is.readInt(); //get the split class name String splitClassName = distinctSplitClassName[index]; Class splitClass = conf.getClassByName(splitClassName); Deserializer d = sf.getDeserializer(splitClass); d.open((InputStream) is); wrappedSplits[i] = (InputSplit)ReflectionUtils.newInstance(splitClass, conf); d.deserialize(wrappedSplits[i]); } } catch (ClassNotFoundException e) { throw new IOException(e); } }
Example 16
Source File: MapTask.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <T> T getSplitDetails(Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(conf); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = StringInterner.weakIntern(Text.readString(inFile)); Class<T> cls; try { cls = (Class<T>) conf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(conf); Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls); deserializer.open(inFile); T split = deserializer.deserialize(null); long pos = inFile.getPos(); getCounters().findCounter( TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
Example 17
Source File: MRInputUtils.java From tez with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter) throws IOException { Path file = new Path(splitMetaInfo.getSplitLocation()); FileSystem fs = FileSystem.getLocal(jobConf); file = fs.makeQualified(file); LOG.info("Reading input split file from : " + file); long offset = splitMetaInfo.getStartOffset(); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<org.apache.hadoop.mapred.InputSplit> cls; try { cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(jobConf); Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null); long pos = inFile.getPos(); if (splitBytesCounter != null) { splitBytesCounter.increment(pos - offset); } inFile.close(); return split; }
Example 18
Source File: MapTask.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <T> T getSplitDetails(Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(conf); FSDataInputStream inFile = fs.open(file); inFile.seek(offset); String className = StringInterner.weakIntern(Text.readString(inFile)); Class<T> cls; try { cls = (Class<T>) conf.getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException("Split class " + className + " not found"); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(conf); Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls); deserializer.open(inFile); T split = deserializer.deserialize(null); long pos = inFile.getPos(); getCounters().findCounter( TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset); inFile.close(); return split; }
Example 19
Source File: RubixFile.java From Cubert with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException { if (keyData != null) return keyData; final FileSystem fs = FileSystem.get(conf); keyData = new ArrayList<KeyData<K>>(); final long filesize = fs.getFileStatus(path).getLen(); FSDataInputStream in = fs.open(path); /* The last long in the file is the start position of the trailer section */ in.seek(filesize - 8); long metaDataStartPos = in.readLong(); in.seek(metaDataStartPos); ObjectMapper mapper = new ObjectMapper(); metadataJson = mapper.readValue(in.readUTF(), JsonNode.class); int keySectionSize = in.readInt(); // load the key section byte[] keySection = new byte[keySectionSize]; in.seek(filesize - keySectionSize - 8); in.read(keySection, 0, keySectionSize); in.close(); ByteArrayInputStream bis = new ByteArrayInputStream(keySection); DataInput dataInput = new DataInputStream(bis); int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue(); // load the key section keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass")); valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass")); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass); deserializer.open(bis); while (bis.available() > 0 && numberOfBlocks > 0) { K key = deserializer.deserialize(null); long offset = dataInput.readLong(); long blockId = dataInput.readLong(); long numRecords = dataInput.readLong(); keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId)); numberOfBlocks--; } // Assign length to each keydata entry int numEntries = keyData.size(); for (int i = 1; i < numEntries; i++) { KeyData<K> prev = keyData.get(i - 1); KeyData<K> current = keyData.get(i); prev.setLength(current.getOffset() - prev.getOffset()); } if (numEntries > 0) { KeyData<K> last = keyData.get(numEntries - 1); last.setLength(metaDataStartPos - last.offset); } return keyData; }
Example 20
Source File: TestIFile.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testConcatenatedZlibPadding() throws IOException, URISyntaxException { byte[] bytes; long compTotal = 0; // Known raw and compressed lengths of input long raws[] = { 2392, 102314, 42576, 31432, 25090 }; long compressed[] = { 723, 25396, 10926, 8203, 6665 }; CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration()); codec = codecFactory.getCodecByClassName("org.apache.hadoop.io.compress.DefaultCodec"); URL url = getClass().getClassLoader() .getResource("TestIFile_concatenated_compressed.bin"); assertNotEquals("IFileinput file must exist", null, url); Path p = new Path(url.toURI()); FSDataInputStream inStream = localFs.open(p); for (int i = 0; i < 5; i++) { bytes = new byte[(int) raws[i]]; assertEquals("Compressed stream out-of-sync", inStream.getPos(), compTotal); IFile.Reader.readToMemory(bytes, inStream, (int) compressed[i], codec, false, -1); compTotal += compressed[i]; // Now read the data InMemoryReader inMemReader = new InMemoryReader(null, new InputAttemptIdentifier(0, 0), bytes, 0, bytes.length); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); Deserializer<Text> keyDeserializer; Deserializer<IntWritable> valDeserializer; SerializationFactory serializationFactory = new SerializationFactory(defaultConf); keyDeserializer = serializationFactory.getDeserializer(Text.class); valDeserializer = serializationFactory.getDeserializer(IntWritable.class); keyDeserializer.open(keyIn); valDeserializer.open(valIn); while (inMemReader.nextRawKey(keyIn)) { inMemReader.nextRawValue(valIn); } } inStream.close(); }