Java Code Examples for org.apache.beam.sdk.util.WindowedValue#WindowedValueCoder
The following examples show how to use
org.apache.beam.sdk.util.WindowedValue#WindowedValueCoder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BoundedDataset.java From beam with Apache License 2.0 | 6 votes |
Iterable<WindowedValue<T>> getValues(PCollection<T> pcollection) { if (windowedValues == null) { WindowFn<?, ?> windowFn = pcollection.getWindowingStrategy().getWindowFn(); Coder<? extends BoundedWindow> windowCoder = windowFn.windowCoder(); final WindowedValue.WindowedValueCoder<T> windowedValueCoder; if (windowFn instanceof GlobalWindows) { windowedValueCoder = WindowedValue.ValueOnlyWindowedValueCoder.of(pcollection.getCoder()); } else { windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(pcollection.getCoder(), windowCoder); } JavaRDDLike<byte[], ?> bytesRDD = rdd.map(CoderHelpers.toByteFunction(windowedValueCoder)); List<byte[]> clientBytes = bytesRDD.collect(); windowedValues = clientBytes.stream() .map(bytes -> CoderHelpers.fromByteArray(bytes, windowedValueCoder)) .collect(Collectors.toList()); } return windowedValues; }
Example 2
Source File: JetTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public Vertex translate( Pipeline pipeline, AppliedPTransform<?, ?, ?> appliedTransform, Node node, JetTranslationContext context) { String transformName = appliedTransform.getFullName(); PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) Utils.getInput(appliedTransform); WindowedValue.WindowedValueCoder<KV<K, InputT>> inputCoder = Utils.getWindowedValueCoder(input); Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform); Coder outputCoder = Utils.getCoder((PCollection) output.getValue()); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); DAGBuilder dagBuilder = context.getDagBuilder(); String vertexId = dagBuilder.newVertexId(transformName); Vertex vertex = dagBuilder.addVertex( vertexId, WindowGroupP.supplier( context.getOptions(), inputCoder, outputCoder, windowingStrategy, vertexId)); dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex); String outputEdgeId = Utils.getTupleTagId(output.getValue()); dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId()); dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder); return vertex; }
Example 3
Source File: StreamingTransformTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() { return new TransformEvaluator<GroupByKey<K, V>>() { @Override public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) { @SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform); List<Integer> streamSources = inputDataset.getStreamSources(); JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream(); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); @SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy(); @SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn(); // --- coders. final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupByKeyAndWindow( dStream, coder.getKeyCoder(), wvCoder, windowingStrategy, context.getSerializableOptions(), streamSources, context.getCurrentTransform().getFullName()); context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources)); } @Override public String toNativeString() { return "groupByKey()"; } }; }
Example 4
Source File: TransformTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V, OutputT> JavaPairRDD<TupleTag<?>, WindowedValue<?>> statefulParDoTransform( KvCoder<K, V> kvCoder, Coder<? extends BoundedWindow> windowCoder, JavaRDD<WindowedValue<KV<K, V>>> kvInRDD, Partitioner partitioner, MultiDoFnFunction<KV<K, V>, OutputT> doFnFunction, boolean requiresSortedInput) { Coder<K> keyCoder = kvCoder.getKeyCoder(); final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowCoder); if (!requiresSortedInput) { return GroupCombineFunctions.groupByKeyOnly(kvInRDD, keyCoder, wvCoder, partitioner) .map( input -> { final K key = input.getKey(); Iterable<WindowedValue<V>> value = input.getValue(); return FluentIterable.from(value) .transform( windowedValue -> windowedValue.withValue(KV.of(key, windowedValue.getValue()))) .iterator(); }) .flatMapToPair(doFnFunction); } JavaPairRDD<ByteArray, byte[]> pairRDD = kvInRDD .map(new ReifyTimestampsAndWindowsFunction<>()) .mapToPair(TranslationUtils.toPairFunction()) .mapToPair( CoderHelpers.toByteFunctionWithTs(keyCoder, wvCoder, in -> in._2().getTimestamp())); JavaPairRDD<ByteArray, byte[]> sorted = pairRDD.repartitionAndSortWithinPartitions(keyPrefixPartitionerFrom(partitioner)); return sorted.mapPartitionsToPair(wrapDoFnFromSortedRDD(doFnFunction, keyCoder, wvCoder)); }
Example 5
Source File: RowHelpers.java From beam with Apache License 2.0 | 5 votes |
/** * A Spark {@link MapFunction} for extracting a {@link WindowedValue} from a Row in which the * {@link WindowedValue} was serialized to bytes using its {@link * WindowedValue.WindowedValueCoder}. * * @param <T> The type of the object. * @return A {@link MapFunction} that accepts a {@link Row} and returns its {@link WindowedValue}. */ public static <T> MapFunction<Row, WindowedValue<T>> extractWindowedValueFromRowMapFunction( WindowedValue.WindowedValueCoder<T> windowedValueCoder) { return (MapFunction<Row, WindowedValue<T>>) value -> { // there is only one value put in each Row by the InputPartitionReader byte[] bytes = (byte[]) value.get(0); return windowedValueCoder.decode(new ByteArrayInputStream(bytes)); }; }
Example 6
Source File: Utils.java From beam with Apache License 2.0 | 5 votes |
static boolean isKeyedValueCoder(Coder coder) { if (coder instanceof KvCoder) { return true; } else if (coder instanceof WindowedValue.WindowedValueCoder) { return ((WindowedValue.WindowedValueCoder) coder).getValueCoder() instanceof KvCoder; } return false; }
Example 7
Source File: GroupByKeyTranslatorBatch.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) { PCollection<KV<K, V>> input = context.getInput(transform); BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder(); WindowingStrategy windowingStrategy = input.getWindowingStrategy(); WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn(); final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder)); // todo add support for a partition function to be specified, this would use // todo keyedPartition function instead of KeyedGather ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder)); // --- now group also by window. ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset .direct() .<WindowedValue<KV<K, Iterable<V>>>>flatmap( new GroupByWindowFunction( windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder()))); PCollection output = context.getOutput(transform); context.setOutputDataSet(output, outputTset); }
Example 8
Source File: BoundedDataset.java From beam with Apache License 2.0 | 5 votes |
List<byte[]> getBytes(WindowedValue.WindowedValueCoder<T> wvCoder) { if (clientBytes == null) { JavaRDDLike<byte[], ?> bytesRDD = rdd.map(CoderHelpers.toByteFunction(wvCoder)); clientBytes = bytesRDD.collect(); } return clientBytes; }
Example 9
Source File: BufferedElementsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCoder() throws IOException { StringUtf8Coder elementCoder = StringUtf8Coder.of(); // Generics fail to see here that this is Coder<BoundedWindow> org.apache.beam.sdk.coders.Coder windowCoder = GlobalWindow.Coder.INSTANCE; WindowedValue.WindowedValueCoder windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(elementCoder, windowCoder); KV<String, Integer> key = KV.of("one", 1); BufferedElements.Coder coder = new BufferedElements.Coder(windowedValueCoder, windowCoder, key); BufferedElement element = new BufferedElements.Element( WindowedValue.of("test", new Instant(2), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)); BufferedElement timerElement = new BufferedElements.Timer( "timerId", "timerId", key, GlobalWindow.INSTANCE, new Instant(1), new Instant(1), TimeDomain.EVENT_TIME); testRoundTrip(ImmutableList.of(element), coder); testRoundTrip(ImmutableList.of(timerElement), coder); testRoundTrip(ImmutableList.of(element, timerElement), coder); testRoundTrip(ImmutableList.of(element, timerElement, element), coder); testRoundTrip(ImmutableList.of(element, element, element, timerElement, timerElement), coder); }
Example 10
Source File: TranslationUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Utility method for deserializing a byte array using the specified coder. (From spark code) * * @param <T> Type of object to be returned. * @param serialized bytearray to be deserialized. * @param coder Coder to deserialize with. * @return Deserialized object. */ public static <T> WindowedValue<T> fromByteArray( byte[] serialized, WindowedValue.WindowedValueCoder<T> coder) { try { return CoderUtils.decodeFromByteArray(coder, serialized); } catch (CoderException e) { LOG.log(Level.SEVERE, "Error while decoding message", e); } return null; }
Example 11
Source File: MapToTupleFunction.java From beam with Apache License 2.0 | 5 votes |
/** * Method used to initialize the transient variables that were sent over as byte arrays or proto * buffers. */ private void initTransient() { if (isInitialized) { return; } keyCoder = (Coder<K>) SerializableUtils.deserializeFromByteArray(keyCoderBytes, "Coder"); wvCoder = (WindowedValue.WindowedValueCoder<V>) SerializableUtils.deserializeFromByteArray(wvCoderBytes, "Coder"); this.isInitialized = true; }
Example 12
Source File: PCollectionViewTranslatorBatch.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateNode( View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) { BatchTSet<WindowedValue<ElemT>> inputDataSet = context.getInputDataSet(context.getInput(transform)); @SuppressWarnings("unchecked") AppliedPTransform< PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> application = (AppliedPTransform< PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>>) context.getCurrentTransform(); org.apache.beam.sdk.values.PCollectionView<ViewT> input; PCollection<ElemT> inputPCol = context.getInput(transform); final KvCoder coder = (KvCoder) inputPCol.getCoder(); Coder inputKeyCoder = coder.getKeyCoder(); WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy(); WindowFn windowFn = windowingStrategy.getWindowFn(); final WindowedValue.WindowedValueCoder wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); BatchTSet<WindowedValue<ElemT>> inputGathered = inputDataSet .direct() .map(new MapToTupleFunction<>(inputKeyCoder, wvCoder)) .allGather() .map(new ByteToWindowFunctionPrimitive(inputKeyCoder, wvCoder)); try { input = CreatePCollectionViewTranslation.getView(application); } catch (IOException e) { throw new RuntimeException(e); } context.setSideInputDataSet(input.getTagInternal().getId(), inputGathered); }
Example 13
Source File: SparkBatchPortablePipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V> void translateGroupByKey( PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); String inputId = getInputId(transformNode); Dataset inputDataset = context.popDataset(inputId); JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD(); WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components); KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder(); Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder(); Coder<V> inputValueCoder = inputKvCoder.getValueCoder(); WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components); WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn(); WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder()); JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow; Partitioner partitioner = getPartitioner(context); if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) { // we can have a memory sensitive translation for non-merging windows groupedByKeyAndWindow = GroupNonMergingWindowsFunctions.groupByKeyAndWindow( inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner); } else { JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly = GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner); // for batch, GroupAlsoByWindow uses an in-memory StateInternals. groupedByKeyAndWindow = groupedByKeyOnly.flatMap( new SparkGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new TranslationUtils.InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(inputValueCoder), context.serializablePipelineOptions)); } context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow)); }
Example 14
Source File: StreamingTransformTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V, W extends BoundedWindow> TransformEvaluator<Reshuffle<K, V>> reshuffle() { return new TransformEvaluator<Reshuffle<K, V>>() { @Override public void evaluate(Reshuffle<K, V> transform, EvaluationContext context) { @SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform); List<Integer> streamSources = inputDataset.getStreamSources(); JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream(); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); @SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy(); @SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn(); final WindowedValue.WindowedValueCoder<KV<K, V>> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder()); JavaDStream<WindowedValue<KV<K, V>>> reshuffledStream = dStream.transform(rdd -> GroupCombineFunctions.reshuffle(rdd, wvCoder)); context.putDataset(transform, new UnboundedDataset<>(reshuffledStream, streamSources)); } @Override public String toNativeString() { return "repartition(...)"; } }; }
Example 15
Source File: DAGBuilder.java From beam with Apache License 2.0 | 4 votes |
PartitionedKeyExtractor(Coder coder) { this.coder = Utils.isKeyedValueCoder(coder) ? (WindowedValue.WindowedValueCoder<KV<K, V>>) coder : null; }
Example 16
Source File: MapToTupleFunction.java From beam with Apache License 2.0 | 4 votes |
public MapToTupleFunction(Coder<K> inputKeyCoder, WindowedValue.WindowedValueCoder<V> wvCoder) { this.keyCoder = inputKeyCoder; this.wvCoder = wvCoder; keyCoderBytes = SerializableUtils.serializeToByteArray(keyCoder); wvCoderBytes = SerializableUtils.serializeToByteArray(wvCoder); }
Example 17
Source File: Utils.java From beam with Apache License 2.0 | 4 votes |
static <T> WindowedValue.WindowedValueCoder<T> getWindowedValueCoder(PCollection<T> pCollection) { return WindowedValue.FullWindowedValueCoder.of( pCollection.getCoder(), pCollection.getWindowingStrategy().getWindowFn().windowCoder()); }
Example 18
Source File: CombinePerKeyTranslatorBatch.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateTransform( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, TranslationContext context) { Combine.PerKey combineTransform = (Combine.PerKey) transform; @SuppressWarnings("unchecked") final PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) context.getInput(); @SuppressWarnings("unchecked") final PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) context.getOutput(); @SuppressWarnings("unchecked") final Combine.CombineFn<InputT, AccumT, OutputT> combineFn = (Combine.CombineFn<InputT, AccumT, OutputT>) combineTransform.getFn(); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); Dataset<WindowedValue<KV<K, InputT>>> inputDataset = context.getDataset(input); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder(); Coder<K> keyCoder = inputCoder.getKeyCoder(); KvCoder<K, OutputT> outputKVCoder = (KvCoder<K, OutputT>) output.getCoder(); Coder<OutputT> outputCoder = outputKVCoder.getValueCoder(); KeyValueGroupedDataset<K, WindowedValue<KV<K, InputT>>> groupedDataset = inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder)); Coder<AccumT> accumulatorCoder = null; try { accumulatorCoder = combineFn.getAccumulatorCoder( input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } Dataset<Tuple2<K, Iterable<WindowedValue<OutputT>>>> combinedDataset = groupedDataset.agg( new AggregatorCombiner<K, InputT, AccumT, OutputT, BoundedWindow>( combineFn, windowingStrategy, accumulatorCoder, outputCoder) .toColumn()); // expand the list into separate elements and put the key back into the elements WindowedValue.WindowedValueCoder<KV<K, OutputT>> wvCoder = WindowedValue.FullWindowedValueCoder.of( outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); Dataset<WindowedValue<KV<K, OutputT>>> outputDataset = combinedDataset.flatMap( (FlatMapFunction< Tuple2<K, Iterable<WindowedValue<OutputT>>>, WindowedValue<KV<K, OutputT>>>) tuple2 -> { K key = tuple2._1(); Iterable<WindowedValue<OutputT>> windowedValues = tuple2._2(); List<WindowedValue<KV<K, OutputT>>> result = new ArrayList<>(); for (WindowedValue<OutputT> windowedValue : windowedValues) { KV<K, OutputT> kv = KV.of(key, windowedValue.getValue()); result.add( WindowedValue.of( kv, windowedValue.getTimestamp(), windowedValue.getWindows(), windowedValue.getPane())); } return result.iterator(); }, EncoderHelpers.fromBeamCoder(wvCoder)); context.putDataset(output, outputDataset); }
Example 19
Source File: ParDoBoundMultiTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <InT, OutT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { Map<String, String> outputs = transform.getTransform().getOutputsMap(); final RunnerApi.ExecutableStagePayload stagePayload; try { stagePayload = RunnerApi.ExecutableStagePayload.parseFrom( transform.getTransform().getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); } String inputId = stagePayload.getInput(); final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId); // TODO: support side input final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList(); final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>(); final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>(); // first output as the main output final TupleTag<OutT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next()); AtomicInteger index = new AtomicInteger(0); outputs .keySet() .iterator() .forEachRemaining( outputName -> { TupleTag<?> tupleTag = new TupleTag<>(outputName); tagToIndexMap.put(tupleTag, index.get()); index.incrementAndGet(); String collectionId = outputs.get(outputName); idToTupleTagMap.put(collectionId, tupleTag); }); WindowedValue.WindowedValueCoder<InT> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(transform.getTransform()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>( mainOutputTag, new NoOpDoFn<>(), null, // key coder not in use windowedInputCoder.getValueCoder(), // input coder not in use windowedInputCoder, Collections.emptyMap(), // output coders not in use Collections.emptyList(), // sideInputs not in use until side input support new ArrayList<>(idToTupleTagMap.values()), // used by java runner only SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline), Collections.emptyMap(), // idToViewMap not in use until side input support new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), isBounded, true, stagePayload, idToTupleTagMap, doFnSchemaInformation, sideInputMapping); final MessageStream<OpMessage<InT>> mergedStreams; if (sideInputStreams.isEmpty()) { mergedStreams = inputStream; } else { MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn()); mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams)); } final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMap(OpAdapter.adapt(op)); for (int outputIndex : tagToIndexMap.values()) { final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream .filter( message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex) .flatMap(OpAdapter.adapt(new RawUnionValueToValue())); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); } }
Example 20
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getOneInputMessageStream(transform); final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1; final WindowingStrategy<?, BoundedWindow> windowingStrategy = ctx.getPortableWindowStrategy(transform, pipeline); final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); final String inputId = ctx.getInputId(transform); final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder(); final Coder<WindowedValue<KV<K, InputT>>> elementCoder = WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder); final TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet())); @SuppressWarnings("unchecked") final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = doTranslateGBK( inputStream, needRepartition, reduceFn, windowingStrategy, kvInputCoder, elementCoder, ctx.getTransformFullName(), ctx.getTransformId(), outputTag, isBounded); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); }