com.google.cloud.dataflow.sdk.coders.Coder Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.coders.Coder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StateCheckpointUtils.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K> void encodeState(Map<K, FlinkStateInternals<K>> perKeyStateInternals, StateCheckpointWriter writer, Coder<K> keyCoder) throws IOException { CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder); int noOfKeys = perKeyStateInternals.size(); writer.writeInt(noOfKeys); for (Map.Entry<K, FlinkStateInternals<K>> keyStatePair : perKeyStateInternals.entrySet()) { K key = keyStatePair.getKey(); FlinkStateInternals<K> state = keyStatePair.getValue(); // encode the key writer.serializeKey(key, keySerializer); // write the associated state state.persistState(writer); } }
Example #2
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void putNormalizedKey(KV<K, V> record, MemorySegment target, int offset, int numBytes) { buffer1.reset(); try { keyCoder.encode(record.getKey(), buffer1, Coder.Context.NESTED); } catch (IOException e) { throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e); } final byte[] data = buffer1.getBuffer(); final int limit = offset + numBytes; int numBytesPut = Math.min(numBytes, buffer1.size()); target.put(offset, data, 0, numBytesPut); offset += numBytesPut; while (offset < limit) { target.put(offset++, (byte) 0); } }
Example #3
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public int compare(KV<K, V> first, KV<K, V> second) { try { buffer1.reset(); buffer2.reset(); keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER); keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER); byte[] arr = buffer1.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (buffer1.size() != buffer2.size()) { return buffer1.size() - buffer2.size(); } int len = buffer1.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return arr[i] - arrOther[i]; } } return 0; } catch (IOException e) { throw new RuntimeException("Could not compare reference.", e); } }
Example #4
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public boolean equalToReference(KV<K, V> candidate) { try { buffer2.reset(); keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER); byte[] arr = referenceBuffer.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (referenceBuffer.size() != buffer2.size()) { return false; } int len = buffer2.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return false; } } return true; } catch (IOException e) { throw new RuntimeException("Could not compare reference.", e); } }
Example #5
Source File: FlinkStreamingCreateFunction.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void flatMap(IN value, Collector<WindowedValue<OUT>> out) throws Exception { @SuppressWarnings("unchecked") OUT voidValue = (OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE; for (byte[] element : elements) { ByteArrayInputStream bai = new ByteArrayInputStream(element); OUT outValue = coder.decode(bai, Coder.Context.OUTER); if (outValue == null) { out.collect(WindowedValue.of(voidValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)); } else { out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)); } } out.close(); }
Example #6
Source File: CoderComparator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public int compare(T first, T second) { try { buffer1.reset(); buffer2.reset(); coder.encode(first, buffer1, Coder.Context.OUTER); coder.encode(second, buffer2, Coder.Context.OUTER); byte[] arr = buffer1.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (buffer1.size() != buffer2.size()) { return buffer1.size() - buffer2.size(); } int len = buffer1.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return arr[i] - arrOther[i]; } } return 0; } catch (IOException e) { throw new RuntimeException("Could not compare: ", e); } }
Example #7
Source File: CoderComparator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public boolean equalToReference(T candidate) { try { buffer2.reset(); coder.encode(candidate, buffer2, Coder.Context.OUTER); byte[] arr = referenceBuffer.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (referenceBuffer.size() != buffer2.size()) { return false; } int len = buffer2.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return false; } } return true; } catch (IOException e) { throw new RuntimeException("Could not compare reference.", e); } }
Example #8
Source File: CoderComparator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void putNormalizedKey(T record, MemorySegment target, int offset, int numBytes) { buffer1.reset(); try { coder.encode(record, buffer1, Coder.Context.OUTER); } catch (IOException e) { throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e); } final byte[] data = buffer1.getBuffer(); final int limit = offset + numBytes; target.put(offset, data, 0, Math.min(numBytes, buffer1.size())); offset += buffer1.size(); while (offset < limit) { target.put(offset++, (byte) 0); } }
Example #9
Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) { final Coder<K> keyCoder = inputKvCoder.getKeyCoder(); final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder); final boolean isKeyVoid = keyCoder instanceof VoidCoder; return inputDataStream.keyBy( new KeySelectorWithQueryableResultType<K, V>() { @Override public K getKey(WindowedValue<KV<K, V>> value) throws Exception { return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE : value.getValue().getKey(); } @Override public TypeInformation<K> getProducedType() { return keyTypeInfo; } }); }
Example #10
Source File: StateCheckpointUtils.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K> Map<K, FlinkStateInternals<K>> decodeState( StateCheckpointReader reader, OutputTimeFn<? super BoundedWindow> outputTimeFn, Coder<K> keyCoder, Coder<? extends BoundedWindow> windowCoder, ClassLoader classLoader) throws IOException, ClassNotFoundException { int noOfKeys = reader.getInt(); Map<K, FlinkStateInternals<K>> perKeyStateInternals = new HashMap<>(noOfKeys); perKeyStateInternals.clear(); CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder); for (int i = 0; i < noOfKeys; i++) { // decode the key. K key = reader.deserializeKey(keySerializer); //decode the state associated to the key. FlinkStateInternals<K> stateForKey = new FlinkStateInternals<>(key, keyCoder, windowCoder, outputTimeFn); stateForKey.restoreState(reader, classLoader); perKeyStateInternals.put(key, stateForKey); } return perKeyStateInternals; }
Example #11
Source File: StateCheckpointUtils.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K> void encodeTimers(Map<K, Set<TimerInternals.TimerData>> allTimers, StateCheckpointWriter writer, Coder<K> keyCoder) throws IOException { CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder); int noOfKeys = allTimers.size(); writer.writeInt(noOfKeys); for (Map.Entry<K, Set<TimerInternals.TimerData>> timersPerKey : allTimers.entrySet()) { K key = timersPerKey.getKey(); // encode the key writer.serializeKey(key, keySerializer); // write the associated timers Set<TimerInternals.TimerData> timers = timersPerKey.getValue(); encodeTimerDataForKey(writer, timers); } }
Example #12
Source File: StateCheckpointUtils.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K> Map<K, Set<TimerInternals.TimerData>> decodeTimers( StateCheckpointReader reader, Coder<? extends BoundedWindow> windowCoder, Coder<K> keyCoder) throws IOException { int noOfKeys = reader.getInt(); Map<K, Set<TimerInternals.TimerData>> activeTimers = new HashMap<>(noOfKeys); activeTimers.clear(); CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder); for (int i = 0; i < noOfKeys; i++) { // decode the key. K key = reader.deserializeKey(keySerializer); // decode the associated timers. Set<TimerInternals.TimerData> timers = decodeTimerDataForKey(reader, windowCoder); activeTimers.put(key, timers); } return activeTimers; }
Example #13
Source File: StateCheckpointUtils.java From flink-dataflow with Apache License 2.0 | 6 votes |
private static Set<TimerInternals.TimerData> decodeTimerDataForKey( StateCheckpointReader reader, Coder<? extends BoundedWindow> windowCoder) throws IOException { // decode the timers: first their number and then the content itself. int noOfTimers = reader.getInt(); Set<TimerInternals.TimerData> timers = new HashSet<>(noOfTimers); for (int i = 0; i < noOfTimers; i++) { String stringKey = reader.getTagToString(); Instant instant = reader.getTimestamp(); TimeDomain domain = TimeDomain.values()[reader.getInt()]; StateNamespace namespace = StateNamespaces.fromString(stringKey, windowCoder); timers.add(TimerInternals.TimerData.of(namespace, instant, domain)); } return timers; }
Example #14
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException { if (value != null) { // serialize the coder. byte[] coder = InstantiationUtil.serializeObject(elemCoder); // encode the value into a ByteString ByteString.Output stream = ByteString.newOutput(); elemCoder.encode(value, stream, Coder.Context.OUTER); ByteString data = stream.toByteString(); checkpointBuilder.addValueBuilder() .setTag(stateKey) .setData(coder) .setData(data); } }
Example #15
Source File: FlinkCreateFunction.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public void flatMap(IN value, Collector<OUT> out) throws Exception { for (byte[] element : elements) { ByteArrayInputStream bai = new ByteArrayInputStream(element); OUT outValue = coder.decode(bai, Coder.Context.OUTER); if (outValue == null) { // TODO Flink doesn't allow null values in records out.collect((OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE); } else { out.collect(outValue); } } out.close(); }
Example #16
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey, CombineWithContext.KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn, Coder<AccumT> accumCoder, final StateContext<?> stateContext) { Preconditions.checkNotNull(combineFn); Preconditions.checkNotNull(accumCoder); this.stateKey = stateKey; this.combineFn = combineFn; this.accumCoder = accumCoder; this.context = new CombineWithContext.Context() { @Override public PipelineOptions getPipelineOptions() { return stateContext.getPipelineOptions(); } @Override public <T> T sideInput(PCollectionView<T> view) { return stateContext.sideInput(view); } }; accum = combineFn.createAccumulator(key, context); }
Example #17
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException { if (!isClear) { // serialize the coder. byte[] coder = InstantiationUtil.serializeObject(accumCoder); // serialize the combiner. byte[] combiner = InstantiationUtil.serializeObject(combineFn); // encode the accumulator into a ByteString ByteString.Output stream = ByteString.newOutput(); accumCoder.encode(accum, stream, Coder.Context.OUTER); ByteString data = stream.toByteString(); // put the flag that the next serialized element is an accumulator checkpointBuilder.addAccumulatorBuilder() .setTag(stateKey) .setData(coder) .setData(combiner) .setData(data); } }
Example #18
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException { if (!contents.isEmpty()) { // serialize the coder. byte[] coder = InstantiationUtil.serializeObject(elemCoder); checkpointBuilder.addListUpdatesBuilder() .setTag(stateKey) .setData(coder) .writeInt(contents.size()); for (T item : contents) { // encode the element ByteString.Output stream = ByteString.newOutput(); elemCoder.encode(item, stream, Coder.Context.OUTER); ByteString data = stream.toByteString(); // add the data to the checkpoint. checkpointBuilder.setData(data); } } }
Example #19
Source File: UnionCoder.java From flink-dataflow with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public void encode( RawUnionValue union, OutputStream outStream, Context context) throws IOException { int index = getIndexForEncoding(union); // Write out the union tag. VarInt.encode(index, outStream); // Write out the actual value. Coder<Object> coder = (Coder<Object>) elementCoders.get(index); coder.encode( union.getValue(), outStream, context); }
Example #20
Source File: UnboundedSocketSource.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Nullable @Override public Coder getCheckpointMarkCoder() { // Flink and Dataflow have different checkpointing mechanisms. // In our case we do not need a coder. return null; }
Example #21
Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternals() { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { return null; } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { } @Override public TimerInternals timerInternals() { return null; } @Override public Collection<? extends BoundedWindow> windows() { return ImmutableList.of(GlobalWindow.INSTANCE); } @Override public PaneInfo pane() { return PaneInfo.NO_FIRING; } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #22
Source File: FlinkStreamingTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(Create.Values<OUT> transform, FlinkStreamingTranslationContext context) { PCollection<OUT> output = context.getOutput(transform); Iterable<OUT> elements = transform.getElements(); // we need to serialize the elements to byte arrays, since they might contain // elements that are not serializable by Java serialization. We deserialize them // in the FlatMap function using the Coder. List<byte[]> serializedElements = Lists.newArrayList(); Coder<OUT> elementCoder = context.getOutput(transform).getCoder(); for (OUT element: elements) { ByteArrayOutputStream bao = new ByteArrayOutputStream(); try { elementCoder.encode(element, bao, Coder.Context.OUTER); serializedElements.add(bao.toByteArray()); } catch (IOException e) { throw new RuntimeException("Could not serialize Create elements using Coder: " + e); } } DataStream<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1); FlinkStreamingCreateFunction<Integer, OUT> createFunction = new FlinkStreamingCreateFunction<>(serializedElements, elementCoder); WindowedValue.ValueOnlyWindowedValueCoder<OUT> windowCoder = WindowedValue.getValueOnlyCoder(elementCoder); TypeInformation<WindowedValue<OUT>> outputType = new CoderTypeInformation<>(windowCoder); DataStream<WindowedValue<OUT>> outputDataStream = initDataSet.flatMap(createFunction) .returns(outputType); context.setOutputDataStream(context.getOutput(transform), outputDataStream); }
Example #23
Source File: UnionCoder.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder. */ @Override public void registerByteSizeObserver( RawUnionValue union, ElementByteSizeObserver observer, Context context) throws Exception { int index = getIndexForEncoding(union); // Write out the union tag. observer.update(VarInt.getLength(index)); // Write out the actual value. @SuppressWarnings("unchecked") Coder<Object> coder = (Coder<Object>) elementCoders.get(index); coder.registerByteSizeObserver(union.getValue(), observer, context); }
Example #24
Source File: UnionCoder.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Since this coder uses elementCoders.get(index) and coders that are known to run in constant * time, we defer the return value to that coder. */ @Override public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) { int index = getIndexForEncoding(union); @SuppressWarnings("unchecked") Coder<Object> coder = (Coder<Object>) elementCoders.get(index); return coder.isRegisterByteSizeObserverCheap(union.getValue(), context); }
Example #25
Source File: CompleteTimeSeriesAggCombiner.java From data-timeseries-java with Apache License 2.0 | 5 votes |
@Override public Accum decode(InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException, IOException { Accum accum = new Accum(); accum.lastCandle = TSPROTO_CODER.decode(inStream, context.nested()); accum.candles = LIST_CODER.decode(inStream, context.nested()); return accum; }
Example #26
Source File: CompleteTimeSeriesAggCombiner.java From data-timeseries-java with Apache License 2.0 | 5 votes |
@Override public void encode(Accum value, OutputStream outStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException, IOException { TSPROTO_CODER.encode(value.lastCandle, outStream, context.nested()); LIST_CODER.encode(value.candles, outStream, context.nested()); }
Example #27
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 5 votes |
public void restoreState(StateCheckpointReader checkpointReader) throws IOException { int noOfValues = checkpointReader.getInt(); for (int j = 0; j < noOfValues; j++) { ByteString valueContent = checkpointReader.getData(); T outValue = elemCoder.decode(new ByteArrayInputStream(valueContent.toByteArray()), Coder.Context.OUTER); add(outValue); } }
Example #28
Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { throw new NullPointerException("StateInternals are not available for ParDo.Bound()."); } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { collector.collect(makeWindowedValue(output, timestamp, windows, pane)); } @Override public TimerInternals timerInternals() { throw new NullPointerException("TimeInternals are not available for ParDo.Bound()."); } @Override public Collection<? extends BoundedWindow> windows() { return inElement.getWindows(); } @Override public PaneInfo pane() { return inElement.getPane(); } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode."); } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #29
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) { String name = transform.getName(); BoundedSource<T> source = transform.getSource(); PCollection<T> output = context.getOutput(transform); Coder<T> coder = output.getCoder(); TypeInformation<T> typeInformation = context.getTypeInfo(output); DataSource<T> dataSource = new DataSource<>(context.getExecutionEnvironment(), new SourceInputFormat<>(source, context.getPipelineOptions()), typeInformation, name); context.setOutputDataSet(output, dataSource); }
Example #30
Source File: FlinkBatchTranslationContext.java From flink-dataflow with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public <T> TypeInformation<T> getTypeInfo(PInput output) { if (output instanceof TypedPValue) { Coder<?> outputCoder = ((TypedPValue) output).getCoder(); if (outputCoder instanceof KvCoder) { return new KvCoderTypeInformation((KvCoder) outputCoder); } else { return new CoderTypeInformation(outputCoder); } } return new GenericTypeInfo<>((Class<T>)Object.class); }