org.apache.hadoop.io.serializer.Serializer Java Examples
The following examples show how to use
org.apache.hadoop.io.serializer.Serializer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMerge.java From hadoop with Apache License 2.0 | 6 votes |
public KeyValueWriter(Configuration conf, OutputStream output, Class<K> kyClass, Class<V> valClass ) throws IOException { keyClass = kyClass; valueClass = valClass; dataBuffer = new DataOutputBuffer(); SerializationFactory serializationFactory = new SerializationFactory(conf); keySerializer = (Serializer<K>)serializationFactory.getSerializer(keyClass); keySerializer.open(dataBuffer); valueSerializer = (Serializer<V>)serializationFactory.getSerializer(valueClass); valueSerializer.open(dataBuffer); outputStream = new DataOutputStream(output); }
Example #2
Source File: ReflectionUtils.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param dst the object to copy from * @param src the object to copy into, which is destroyed * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example #3
Source File: TestWritableJobConf.java From hadoop-gpu with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example #4
Source File: Chain.java From hadoop-gpu with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example #5
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
@InterfaceStability.Evolving public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRRuntimeProtos.MRSplitProto createSplitProto( T newSplit, SerializationFactory serializationFactory) throws IOException, InterruptedException { MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto .newBuilder(); builder.setSplitClassName(newSplit.getClass().getName()); @SuppressWarnings("unchecked") Serializer<T> serializer = serializationFactory .getSerializer((Class<T>) newSplit.getClass()); ByteString.Output out = ByteString .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE); serializer.open(out); serializer.serialize(newSplit); // TODO MR Compat: Check against max block locations per split. ByteString splitBs = out.toByteString(); builder.setSplitBytes(splitBs); return builder.build(); }
Example #6
Source File: MRHelpers.java From incubator-tez with Apache License 2.0 | 6 votes |
@Private public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRSplitProto createSplitProto( T newSplit, SerializationFactory serializationFactory) throws IOException, InterruptedException { MRSplitProto.Builder builder = MRSplitProto .newBuilder(); builder.setSplitClassName(newSplit.getClass().getName()); @SuppressWarnings("unchecked") Serializer<T> serializer = serializationFactory .getSerializer((Class<T>) newSplit.getClass()); ByteString.Output out = ByteString .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE); serializer.open(out); serializer.serialize(newSplit); // TODO MR Compat: Check against max block locations per split. ByteString splitBs = out.toByteString(); builder.setSplitBytes(splitBs); return builder.build(); }
Example #7
Source File: ReflectionUtils.java From RDFS with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param dst the object to copy from * @param src the object to copy into, which is destroyed * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example #8
Source File: TestWritableJobConf.java From RDFS with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example #9
Source File: Chain.java From RDFS with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example #10
Source File: OutputCollectorImpl.java From attic-apex-malhar with Apache License 2.0 | 6 votes |
private <T> T cloneObj(T t) throws IOException { Serializer<T> keySerializer; Class<T> keyClass; PipedInputStream pis = new PipedInputStream(); PipedOutputStream pos = new PipedOutputStream(pis); keyClass = (Class<T>)t.getClass(); keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(pos); keySerializer.serialize(t); Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass); keyDesiralizer.open(pis); T clonedArg0 = keyDesiralizer.deserialize(null); pos.close(); pis.close(); keySerializer.close(); keyDesiralizer.close(); return clonedArg0; }
Example #11
Source File: ReflectionUtils.java From big-c with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param src the object to copy from * @param dst the object to copy into, which is destroyed * @return dst param (the copy) * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example #12
Source File: ReduceContextImpl.java From big-c with Apache License 2.0 | 6 votes |
/** * This method is called to write the record that was most recently * served (before a call to the mark). Since the framework reads one * record in advance, to get this record, we serialize the current key * and value * @param out * @throws IOException */ private void writeFirstKeyValueBytes(DataOutputStream out) throws IOException { assert (getCurrentKey() != null && getCurrentValue() != null); WritableUtils.writeVInt(out, currentKeyLength); WritableUtils.writeVInt(out, currentValueLength); Serializer<KEYIN> keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(out); keySerializer.serialize(getCurrentKey()); Serializer<VALUEIN> valueSerializer = serializationFactory.getSerializer(valueClass); valueSerializer.open(out); valueSerializer.serialize(getCurrentValue()); }
Example #13
Source File: Chain.java From big-c with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example #14
Source File: ReflectionUtils.java From hadoop with Apache License 2.0 | 6 votes |
/** * Make a copy of the writable object using serialization to a buffer * @param src the object to copy from * @param dst the object to copy into, which is destroyed * @return dst param (the copy) * @throws IOException */ @SuppressWarnings("unchecked") public static <T> T copy(Configuration conf, T src, T dst) throws IOException { CopyInCopyOutBuffer buffer = cloneBuffers.get(); buffer.outBuffer.reset(); SerializationFactory factory = getFactory(conf); Class<T> cls = (Class<T>) src.getClass(); Serializer<T> serializer = factory.getSerializer(cls); serializer.open(buffer.outBuffer); serializer.serialize(src); buffer.moveData(); Deserializer<T> deserializer = factory.getDeserializer(cls); deserializer.open(buffer.inBuffer); dst = deserializer.deserialize(dst); return dst; }
Example #15
Source File: ReduceContextImpl.java From hadoop with Apache License 2.0 | 6 votes |
/** * This method is called to write the record that was most recently * served (before a call to the mark). Since the framework reads one * record in advance, to get this record, we serialize the current key * and value * @param out * @throws IOException */ private void writeFirstKeyValueBytes(DataOutputStream out) throws IOException { assert (getCurrentKey() != null && getCurrentValue() != null); WritableUtils.writeVInt(out, currentKeyLength); WritableUtils.writeVInt(out, currentValueLength); Serializer<KEYIN> keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(out); keySerializer.serialize(getCurrentKey()); Serializer<VALUEIN> valueSerializer = serializationFactory.getSerializer(valueClass); valueSerializer.open(out); valueSerializer.serialize(getCurrentValue()); }
Example #16
Source File: Chain.java From hadoop with Apache License 2.0 | 6 votes |
private <E> E makeCopyForPassByValue(Serialization<E> serialization, E obj) throws IOException { Serializer<E> ser = serialization.getSerializer(GenericsUtil.getClass(obj)); Deserializer<E> deser = serialization.getDeserializer(GenericsUtil.getClass(obj)); DataOutputBuffer dof = threadLocalDataOutputBuffer.get(); dof.reset(); ser.open(dof); ser.serialize(obj); ser.close(); obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj), getChainJobConf()); ByteArrayInputStream bais = new ByteArrayInputStream(dof.getData(), 0, dof.getLength()); deser.open(bais); deser.deserialize(obj); deser.close(); return obj; }
Example #17
Source File: TestMerge.java From big-c with Apache License 2.0 | 6 votes |
public KeyValueWriter(Configuration conf, OutputStream output, Class<K> kyClass, Class<V> valClass ) throws IOException { keyClass = kyClass; valueClass = valClass; dataBuffer = new DataOutputBuffer(); SerializationFactory serializationFactory = new SerializationFactory(conf); keySerializer = (Serializer<K>)serializationFactory.getSerializer(keyClass); keySerializer.open(dataBuffer); valueSerializer = (Serializer<V>)serializationFactory.getSerializer(valueClass); valueSerializer.open(dataBuffer); outputStream = new DataOutputStream(output); }
Example #18
Source File: TestWritableJobConf.java From hadoop with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example #19
Source File: TestWritableJobConf.java From big-c with Apache License 2.0 | 6 votes |
private <K> K serDeser(K conf) throws Exception { SerializationFactory factory = new SerializationFactory(CONF); Serializer<K> serializer = factory.getSerializer(GenericsUtil.getClass(conf)); Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil.getClass(conf)); DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(conf); serializer.close(); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); K after = deserializer.deserialize(null); deserializer.close(); return after; }
Example #20
Source File: TaggedInputSplit.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public void write(DataOutput out) throws IOException { Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClass.getName()); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream)out); serializer.serialize(inputSplit); }
Example #21
Source File: MapOperatorTest.java From attic-apex-malhar with Apache License 2.0 | 5 votes |
public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException { CollectorTestSink sortSink = new CollectorTestSink(); oper.output.setSink(sortSink); oper.setMapClass(WordCount.Map.class); oper.setCombineClass(WordCount.Reduce.class); oper.setDirName(testMeta.testDir); oper.setConfigFile(null); oper.setInputFormatClass(TextInputFormat.class); Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.setInputPaths(jobConf, new Path(testMeta.testDir)); TextInputFormat inputFormat = new TextInputFormat(); inputFormat.configure(jobConf); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass()); keySerializer.open(oper.getOutstream()); keySerializer.serialize(splits[0]); oper.setInputSplitClass(splits[0].getClass()); keySerializer.close(); oper.setup(null); oper.beginWindow(0); oper.emitTuples(); oper.emitTuples(); oper.endWindow(); oper.beginWindow(1); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size()); for (Object o : sortSink.collectedTuples) { LOG.debug(o.toString()); } LOG.debug("Done testing round\n"); oper.teardown(); }
Example #22
Source File: TaggedInputSplit.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public void write(DataOutput out) throws IOException { Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClass.getName()); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream)out); serializer.serialize(inputSplit); }
Example #23
Source File: PigSplit.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public void write(DataOutput os) throws IOException { os.writeBoolean(disableCounter); os.writeBoolean(isMultiInputs); os.writeInt(totalSplits); os.writeInt(splitIndex); os.writeInt(inputIndex); writeObject(targetOps, os); os.writeInt(wrappedSplits.length); Set<String> splitClassNameSet = new HashSet<String>(); //first get the distinct split class name set for ( int i= 0; i < wrappedSplits.length; i++) { splitClassNameSet.add(wrappedSplits[i].getClass().getName()); } List<String> distinctSplitClassList = new ArrayList<String>(); distinctSplitClassList.addAll(splitClassNameSet); //write the distinct number of split class name os.writeInt(distinctSplitClassList.size()); //write each classname once for (int i = 0 ; i < distinctSplitClassList.size(); i++) { os.writeUTF(distinctSplitClassList.get(i)); } SerializationFactory sf = new SerializationFactory(conf); for (int i = 0; i < wrappedSplits.length; i++) { //find out the index of the split class name int index = distinctSplitClassList.indexOf(wrappedSplits[i].getClass().getName()); os.writeInt(index); Serializer s = sf.getSerializer(wrappedSplits[i].getClass()); //Checks if Serializer is NULL or not before calling open() method on it. if (s == null) { throw new IllegalArgumentException("Could not find Serializer for class "+wrappedSplits[i].getClass()+". InputSplits must implement Writable."); } s.open((OutputStream) os); // The correct call sequence for Serializer is, we shall open, then serialize, but we shall not close s.serialize(wrappedSplits[i]); } }
Example #24
Source File: JobSplitWriter.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T extends InputSplit> SplitMetaInfo[] writeNewSplits(Configuration conf, T[] array, FSDataOutputStream out) throws IOException, InterruptedException { SplitMetaInfo[] info = new SplitMetaInfo[array.length]; if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT); long offset = out.getPos(); for(T split: array) { long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass()); serializer.open(out); serializer.serialize(split); long currCount = out.getPos(); String[] locations = split.getLocations(); if (locations.length > maxBlockLocations) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + maxBlockLocations); locations = Arrays.copyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo( locations, offset, split.getLength()); offset += currCount - prevCount; } } return info; }
Example #25
Source File: JobSplitWriter.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T extends InputSplit> SplitMetaInfo[] writeNewSplits(Configuration conf, T[] array, FSDataOutputStream out) throws IOException, InterruptedException { SplitMetaInfo[] info = new SplitMetaInfo[array.length]; if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT); long offset = out.getPos(); for(T split: array) { long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass()); serializer.open(out); serializer.serialize(split); long currCount = out.getPos(); String[] locations = split.getLocations(); if (locations.length > maxBlockLocations) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + maxBlockLocations); locations = Arrays.copyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo( locations, offset, split.getLength()); offset += currCount - prevCount; } } return info; }
Example #26
Source File: JobClient.java From RDFS with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <T extends org.apache.hadoop.mapreduce.InputSplit> List<RawSplit> computeNewSplits(JobContext job) throws IOException, InterruptedException, ClassNotFoundException { JobConf conf = job.getJobConf(); org.apache.hadoop.mapreduce.InputFormat<?,?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf()); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job); T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new NewSplitComparator()); List<RawSplit> rawSplits = new ArrayList<RawSplit>(); if (array.length != 0) { DataOutputBuffer buffer = new DataOutputBuffer(); SerializationFactory factory = new SerializationFactory(conf); Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass()); serializer.open(buffer); for(T split: array) { RawSplit rawSplit = new RawSplit(); rawSplit.setClassName(split.getClass().getName()); buffer.reset(); serializer.serialize(split); rawSplit.setDataLength(split.getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(split.getLocations()); rawSplits.add(rawSplit); } serializer.close(); } return rawSplits; }
Example #27
Source File: JobClient.java From hadoop-gpu with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeNewSplits(JobContext job, Path submitSplitFile ) throws IOException, InterruptedException, ClassNotFoundException { JobConf conf = job.getJobConf(); org.apache.hadoop.mapreduce.InputFormat<?,?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf()); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job); T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new NewSplitComparator()); DataOutputStream out = writeSplitsFileHeader(conf, submitSplitFile, array.length); try { if (array.length != 0) { DataOutputBuffer buffer = new DataOutputBuffer(); RawSplit rawSplit = new RawSplit(); SerializationFactory factory = new SerializationFactory(conf); Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass()); serializer.open(buffer); for(T split: array) { rawSplit.setClassName(split.getClass().getName()); buffer.reset(); serializer.serialize(split); rawSplit.setDataLength(split.getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(split.getLocations()); rawSplit.write(out); } serializer.close(); } } finally { out.close(); } return array.length; }
Example #28
Source File: TestValuesIterator.java From tez with Apache License 2.0 | 4 votes |
/** * create inmemory segments * * @return * @throws IOException */ @SuppressWarnings("unchecked") public List<TezMerger.Segment> createInMemStreams() throws IOException { int numberOfStreams = Math.max(2, rnd.nextInt(10)); LOG.info("No of streams : " + numberOfStreams); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(keyClass); Serializer valueSerializer = serializationFactory.getSerializer(valClass); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext context = createTezInputContext(); MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1); DataOutputBuffer keyBuf = new DataOutputBuffer(); DataOutputBuffer valBuf = new DataOutputBuffer(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); keySerializer.open(keyBuf); valueSerializer.open(valBuf); List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>(); for (int i = 0; i < numberOfStreams; i++) { BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024); InMemoryWriter writer = new InMemoryWriter(bout); Map<Writable, Writable> data = createData(); //write data for (Map.Entry<Writable, Writable> entry : data.entrySet()) { keySerializer.serialize(entry.getKey()); valueSerializer.serialize(entry.getValue()); keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength()); valIn.reset(valBuf.getData(), 0, valBuf.getLength()); writer.append(keyIn, valIn); originalData.put(entry.getKey(), entry.getValue()); keyBuf.reset(); valBuf.reset(); keyIn.reset(); valIn.reset(); } IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length); segments.add(new TezMerger.Segment(reader, null)); data.clear(); writer.close(); } return segments; }
Example #29
Source File: TezBytesWritableSerialization.java From tez with Apache License 2.0 | 4 votes |
@Override public Serializer<Writable> getSerializer(Class<Writable> c) { return new TezBytesWritableSerializer(); }
Example #30
Source File: BinaryBlockSerialization.java From systemds with Apache License 2.0 | 4 votes |
@Override public Serializer<MatrixBlock> getSerializer(Class arg0) { return new MatrixBlockSerializer(); }