Java Code Examples for org.apache.beam.runners.core.SystemReduceFn#buffering()
The following examples show how to use
org.apache.beam.runners.core.SystemReduceFn#buffering() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PipelineTranslator.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Create a group by key transform. * It returns GroupByKeyAndWindowDoFnTransform if window function is not default. * * @param ctx translation context * @param beamNode the beam node to be translated * @return group by key transform */ private static Transform createGBKTransform( final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode) { final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline()); final PCollection<?> mainInput = (PCollection<?>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform)); final TupleTag mainOutputTag = new TupleTag<>(); if (isGlobalWindow(beamNode, ctx.getPipeline())) { return new GroupByKeyTransform(); } else { return new GroupByKeyAndWindowDoFnTransform( getOutputCoders(pTransform), mainOutputTag, mainInput.getWindowingStrategy(), ctx.getPipelineOptions(), SystemReduceFn.buffering(mainInput.getCoder()), DisplayData.from(beamNode.getTransform())); } }
Example 2
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example 3
Source File: BatchGroupAlsoByWindowsDoFns.java From beam with Apache License 2.0 | 5 votes |
/** * Create a {@link BatchGroupAlsoByWindowFn} without a combine function. Depending on the {@code * windowFn} this will either use iterators or window sets to implement the grouping. * * @param windowingStrategy The window function and trigger to use for grouping * @param inputCoder the input coder to use */ public static <K, V, W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, V, Iterable<V>> createForIterable( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, Coder<V> inputCoder) { // If the windowing strategy indicates we're doing a reshuffle, use the special-path. if (BatchGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) { return new BatchGroupAlsoByWindowReshuffleFn<>(); } else if (BatchGroupAlsoByWindowViaIteratorsFn.isSupported(windowingStrategy)) { return new BatchGroupAlsoByWindowViaIteratorsFn<K, V, W>(windowingStrategy); } return new BatchGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder)); }
Example 4
Source File: GroupAlsoByWindowViaOutputBufferDoFnTest.java From beam with Apache License 2.0 | 5 votes |
@Override public <W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, InputT, Iterable<InputT>> forStrategy( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory) { return new BatchGroupAlsoByWindowViaOutputBufferFn<K, InputT, Iterable<InputT>, W>( windowingStrategy, stateInternalsFactory, SystemReduceFn.<K, InputT, W>buffering(inputCoder)); }
Example 5
Source File: GroupAlsoByWindowEvaluatorFactory.java From beam with Apache License 2.0 | 5 votes |
public GroupAlsoByWindowEvaluator( final EvaluationContext evaluationContext, PipelineOptions options, CommittedBundle<KeyedWorkItem<K, V>> inputBundle, final AppliedPTransform< PCollection<KeyedWorkItem<K, V>>, PCollection<KV<K, Iterable<V>>>, DirectGroupAlsoByWindow<K, V>> application) { this.evaluationContext = evaluationContext; this.options = options; this.application = application; structuralKey = inputBundle.getKey(); stepContext = evaluationContext .getExecutionContext(application, inputBundle.getKey()) .getStepContext(evaluationContext.getStepName(application)); windowingStrategy = (WindowingStrategy<?, BoundedWindow>) application.getTransform().getInputWindowingStrategy(); outputBundles = new ArrayList<>(); unprocessedElements = ImmutableList.builder(); Coder<V> valueCoder = application.getTransform().getValueCoder(inputBundle.getPCollection().getCoder()); reduceFn = SystemReduceFn.buffering(valueCoder); droppedDueToLateness = Metrics.counter( GroupAlsoByWindowEvaluator.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER); }
Example 6
Source File: WindowGroupP.java From beam with Apache License 2.0 | 4 votes |
KeyManager(K key) { this.timerInternals = new InMemoryTimerInternals(); this.stateInternals = new InMemoryStateInternalsImpl(key); this.reduceFnRunner = new ReduceFnRunner<>( key, windowingStrategy, ExecutableTriggerStateMachine.create( TriggerStateMachines.stateMachineForTrigger( TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, new OutputWindowedValue<KV<K, Iterable<V>>>() { @Override public void outputWindowedValue( KV<K, Iterable<V>> output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { WindowedValue<KV<K, Iterable<V>>> windowedValue = WindowedValue.of(output, timestamp, windows, pane); byte[] encodedValue = Utils.encode(windowedValue, outputCoder); //noinspection ResultOfMethodCallIgnored appendableTraverser.append(encodedValue); } @Override public <AdditionalOutputT> void outputWindowedValue( TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { throw new UnsupportedOperationException("Grouping should not use side outputs"); } }, NullSideInputReader.empty(), SystemReduceFn.buffering(inputValueValueCoder), pipelineOptions.get()); advanceWatermark(latestWatermark, Instant.now()); }
Example 7
Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0 | 4 votes |
@Override public Iterator< Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>> apply( final Iterator< Tuple3< /*K*/ ByteArray, Seq</*WV<I>*/ byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>> input) { // --- ACTUAL STATEFUL OPERATION: // // Input Iterator: the partition (~bundle) of a co-grouping of the input // and the previous state (if exists). // // Output Iterator: the output key, and the updated state. // // possible input scenarios for (K, Seq, Option<S>): // (1) Option<S>.isEmpty: new data with no previous state. // (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour). // (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state. final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn = SystemReduceFn.buffering(wvCoder.getValueCoder()); final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider"); final CounterCell droppedDueToClosedWindow = cellProvider.getCounter( MetricName.named( SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER)); final CounterCell droppedDueToLateness = cellProvider.getCounter( MetricName.named( SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER)); // log if there's something to log. final long lateDropped = droppedDueToLateness.getCumulative(); if (lateDropped > 0) { LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped)); droppedDueToLateness.inc(-droppedDueToLateness.getCumulative()); } final long closedWindowDropped = droppedDueToClosedWindow.getCumulative(); if (closedWindowDropped > 0) { LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped)); droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative()); } return scala.collection.JavaConversions.asScalaIterator( new UpdateStateByKeyOutputIterator(input, reduceFn, droppedDueToLateness)); }
Example 8
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform, FlinkStreamingTranslationContext context) { PCollection<KV<K, InputT>> input = context.getInput(transform); @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> windowingStrategy = (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(); KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder(); SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of( inputKvCoder.getKeyCoder(), inputKvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()); DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input); WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder = WindowedValue.getFullCoder( workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream = inputDataStream .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder())); SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputKvCoder.getValueCoder()); Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder = context.getWindowedInputCoder(context.getOutput(transform)); TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo = context.getTypeInfo(context.getOutput(transform)); TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output"); String fullName = getCurrentTransformName(context); WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator = new WindowDoFnOperator<>( reduceFn, fullName, (Coder) windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ context.getPipelineOptions(), inputKvCoder.getKeyCoder(), keySelector); // our operator expects WindowedValue<KeyedWorkItem> while our input stream // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ... @SuppressWarnings("unchecked") SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream = keyedWorkItemStream .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator) .uid(fullName); context.setOutputDataStream(context.getOutput(transform), outDataStream); }
Example 9
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK( DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) { KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder(); SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of( inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder()); WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream = inputDataStream .flatMap( new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>( context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder()); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector); SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder()); Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder()); Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder( KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder()); TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder); TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output"); WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>( reduceFn, operatorName, (Coder) windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ context.getPipelineOptions(), inputElementCoder.getKeyCoder(), (KeySelector) keySelector /* key selector */); SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = keyedWorkItemStream.transform( operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator); return outputDataStream; }
Example 10
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getOneInputMessageStream(transform); final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1; final WindowingStrategy<?, BoundedWindow> windowingStrategy = ctx.getPortableWindowStrategy(transform, pipeline); final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); final String inputId = ctx.getInputId(transform); final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder(); final Coder<WindowedValue<KV<K, InputT>>> elementCoder = WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder); final TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet())); @SuppressWarnings("unchecked") final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = doTranslateGBK( inputStream, needRepartition, reduceFn, windowingStrategy, kvInputCoder, elementCoder, ctx.getTransformFullName(), ctx.getTransformId(), outputTag, isBounded); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); }