org.apache.beam.runners.core.SystemReduceFn Java Examples
The following examples show how to use
org.apache.beam.runners.core.SystemReduceFn.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PipelineTranslator.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Create a group by key transform. * It returns GroupByKeyAndWindowDoFnTransform if window function is not default. * * @param ctx translation context * @param beamNode the beam node to be translated * @return group by key transform */ private static Transform createGBKTransform( final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode) { final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline()); final PCollection<?> mainInput = (PCollection<?>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform)); final TupleTag mainOutputTag = new TupleTag<>(); if (isGlobalWindow(beamNode, ctx.getPipeline())) { return new GroupByKeyTransform(); } else { return new GroupByKeyAndWindowDoFnTransform( getOutputCoders(pTransform), mainOutputTag, mainInput.getWindowingStrategy(), ctx.getPipelineOptions(), SystemReduceFn.buffering(mainInput.getCoder()), DisplayData.from(beamNode.getTransform())); } }
Example #2
Source File: GroupByWindowFunction.java From beam with Apache License 2.0 | 6 votes |
public GroupByWindowFunction( WindowingStrategy<?, W> windowingStrategy, SystemReduceFn<K, V, Iterable<V>, Iterable<V>, W> reduceFn, PipelineOptions options) { this.windowingStrategy = windowingStrategy; this.options = options; this.serializedOptions = new SerializablePipelineOptions(options).toString(); SdkComponents components = SdkComponents.create(); components.registerEnvironment( Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class))); try { windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components); windowBytes = windowStrategyProto.toByteArray(); } catch (IOException e) { LOG.info(e.getMessage()); } this.reduceFn = reduceFn; }
Example #3
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example #4
Source File: GroupByKeyOp.java From beam with Apache License 2.0 | 6 votes |
public GroupByKeyOp( TupleTag<KV<K, OutputT>> mainOutputTag, Coder<KeyedWorkItem<K, InputT>> inputCoder, SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn, WindowingStrategy<?, BoundedWindow> windowingStrategy, OutputManagerFactory<KV<K, OutputT>> outputManagerFactory, String transformFullName, String transformId, IsBounded isBounded) { this.mainOutputTag = mainOutputTag; this.windowingStrategy = windowingStrategy; this.outputManagerFactory = outputManagerFactory; this.transformFullName = transformFullName; this.transformId = transformId; this.isBounded = isBounded; if (!(inputCoder instanceof KeyedWorkItemCoder)) { throw new IllegalArgumentException( String.format( "GroupByKeyOp requires input to use KeyedWorkItemCoder. Got: %s", inputCoder.getClass())); } this.inputCoder = (KeyedWorkItemCoder<K, InputT>) inputCoder; this.keyCoder = this.inputCoder.getKeyCoder(); this.reduceFn = reduceFn; }
Example #5
Source File: WindowDoFnOperator.java From beam with Apache License 2.0 | 6 votes |
@Override protected DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> getDoFn() { // this will implicitly be keyed by the key of the incoming // element or by the key of a firing timer StateInternalsFactory<K> stateInternalsFactory = key -> (StateInternals) keyedStateInternals; // this will implicitly be keyed like the StateInternalsFactory TimerInternalsFactory<K> timerInternalsFactory = key -> timerInternals; // we have to do the unchecked cast because GroupAlsoByWindowViaWindowSetDoFn.create // has the window type as generic parameter while WindowingStrategy is almost always // untyped. @SuppressWarnings("unchecked") DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create( windowingStrategy, stateInternalsFactory, timerInternalsFactory, sideInputReader, (SystemReduceFn) systemReduceFn, outputManager, mainOutputTag); return doFn; }
Example #6
Source File: GroupAlsoByWindowEvaluatorFactory.java From beam with Apache License 2.0 | 5 votes |
public GroupAlsoByWindowEvaluator( final EvaluationContext evaluationContext, PipelineOptions options, CommittedBundle<KeyedWorkItem<K, V>> inputBundle, final AppliedPTransform< PCollection<KeyedWorkItem<K, V>>, PCollection<KV<K, Iterable<V>>>, DirectGroupAlsoByWindow<K, V>> application) { this.evaluationContext = evaluationContext; this.options = options; this.application = application; structuralKey = inputBundle.getKey(); stepContext = evaluationContext .getExecutionContext(application, inputBundle.getKey()) .getStepContext(evaluationContext.getStepName(application)); windowingStrategy = (WindowingStrategy<?, BoundedWindow>) application.getTransform().getInputWindowingStrategy(); outputBundles = new ArrayList<>(); unprocessedElements = ImmutableList.builder(); Coder<V> valueCoder = application.getTransform().getValueCoder(inputBundle.getPCollection().getCoder()); reduceFn = SystemReduceFn.buffering(valueCoder); droppedDueToLateness = Metrics.counter( GroupAlsoByWindowEvaluator.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER); }
Example #7
Source File: CombiningGroupAlsoByWindowsViaOutputBufferDoFnTest.java From beam with Apache License 2.0 | 5 votes |
@Override public <W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, InputT, OutputT> forStrategy( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory) { return new BatchGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, stateInternalsFactory, SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn)); }
Example #8
Source File: GroupAlsoByWindowViaOutputBufferDoFnTest.java From beam with Apache License 2.0 | 5 votes |
@Override public <W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, InputT, Iterable<InputT>> forStrategy( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory) { return new BatchGroupAlsoByWindowViaOutputBufferFn<K, InputT, Iterable<InputT>, W>( windowingStrategy, stateInternalsFactory, SystemReduceFn.<K, InputT, W>buffering(inputCoder)); }
Example #9
Source File: StreamingGroupAlsoByWindowViaWindowSetFn.java From beam with Apache License 2.0 | 5 votes |
private StreamingGroupAlsoByWindowViaWindowSetFn( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) { @SuppressWarnings("unchecked") WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy; this.windowingStrategy = noWildcard; this.reduceFn = reduceFn; this.stateInternalsFactory = stateInternalsFactory; }
Example #10
Source File: StreamingGroupAlsoByWindowViaWindowSetFn.java From beam with Apache License 2.0 | 5 votes |
public static <K, InputT, OutputT, W extends BoundedWindow> GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create( WindowingStrategy<?, W> strategy, StateInternalsFactory<K> stateInternalsFactory, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) { return new StreamingGroupAlsoByWindowViaWindowSetFn<>( strategy, stateInternalsFactory, reduceFn); }
Example #11
Source File: BatchGroupAlsoByWindowViaOutputBufferFn.java From beam with Apache License 2.0 | 5 votes |
public BatchGroupAlsoByWindowViaOutputBufferFn( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) { this.strategy = windowingStrategy; this.reduceFn = reduceFn; this.stateInternalsFactory = stateInternalsFactory; }
Example #12
Source File: BatchGroupAlsoByWindowsDoFns.java From beam with Apache License 2.0 | 5 votes |
/** * Create a {@link BatchGroupAlsoByWindowFn} without a combine function. Depending on the {@code * windowFn} this will either use iterators or window sets to implement the grouping. * * @param windowingStrategy The window function and trigger to use for grouping * @param inputCoder the input coder to use */ public static <K, V, W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, V, Iterable<V>> createForIterable( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, Coder<V> inputCoder) { // If the windowing strategy indicates we're doing a reshuffle, use the special-path. if (BatchGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) { return new BatchGroupAlsoByWindowReshuffleFn<>(); } else if (BatchGroupAlsoByWindowViaIteratorsFn.isSupported(windowingStrategy)) { return new BatchGroupAlsoByWindowViaIteratorsFn<K, V, W>(windowingStrategy); } return new BatchGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder)); }
Example #13
Source File: StreamingGroupAlsoByWindowsDoFns.java From beam with Apache License 2.0 | 5 votes |
public static <K, V, W extends BoundedWindow> GroupAlsoByWindowFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable( final WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, Coder<V> inputCoder) { // If the windowing strategy indicates we're doing a reshuffle, use the special-path. if (StreamingGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) { return new StreamingGroupAlsoByWindowReshuffleFn<>(); } else { return StreamingGroupAlsoByWindowViaWindowSetFn.create( windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder)); } }
Example #14
Source File: StreamingGroupAlsoByWindowsDoFns.java From beam with Apache License 2.0 | 5 votes |
public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create( final WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, final Coder<K> keyCoder) { Preconditions.checkNotNull(combineFn); return StreamingGroupAlsoByWindowViaWindowSetFn.create( windowingStrategy, stateInternalsFactory, SystemReduceFn.combining(keyCoder, combineFn)); }
Example #15
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, InputT, OutputT> void doTranslate( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, TransformHierarchy.Node node, TranslationContext ctx) { final PCollection<KV<K, InputT>> input = ctx.getInput(transform); final PCollection<KV<K, OutputT>> output = ctx.getOutput(transform); final TupleTag<KV<K, OutputT>> outputTag = ctx.getOutputTag(transform); @SuppressWarnings("unchecked") final WindowingStrategy<?, BoundedWindow> windowingStrategy = (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(); final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getMessageStream(input); final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) input.getCoder(); final Coder<WindowedValue<KV<K, InputT>>> elementCoder = SamzaCoders.of(input); final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = getSystemReduceFn(transform, input.getPipeline(), kvInputCoder); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = doTranslateGBK( inputStream, needRepartition(node, ctx), reduceFn, windowingStrategy, kvInputCoder, elementCoder, ctx.getTransformFullName(), ctx.getTransformId(), outputTag, input.isBounded()); ctx.registerMessageStream(output, outputStream); }
Example #16
Source File: WindowDoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() { WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(standardMinutes(1))); TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output"); SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn = SystemReduceFn.combining( VarLongCoder.of(), AppliedCombineFn.withInputCoder( Sum.ofLongs(), CoderRegistry.createDefault(), KvCoder.of(VarLongCoder.of(), VarLongCoder.of()))); Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); SingletonKeyedWorkItemCoder<Long, Long> workItemCoder = SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder); FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder = WindowedValue.getFullCoder(workItemCoder, windowCoder); FullWindowedValueCoder<KV<Long, Long>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder); return new WindowDoFnOperator<Long, Long, Long>( reduceFn, "stepName", (Coder) inputCoder, outputTag, emptyList(), new MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, emptyMap(), emptyList(), PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarLongCoder.of(), new WorkItemKeySelector(VarLongCoder.of())); }
Example #17
Source File: WindowDoFnOperator.java From beam with Apache License 2.0 | 5 votes |
public WindowDoFnOperator( SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn, String stepName, Coder<WindowedValue<KeyedWorkItem<K, InputT>>> windowedInputCoder, TupleTag<KV<K, OutputT>> mainOutputTag, List<TupleTag<?>> additionalOutputTags, OutputManagerFactory<KV<K, OutputT>> outputManagerFactory, WindowingStrategy<?, ?> windowingStrategy, Map<Integer, PCollectionView<?>> sideInputTagMapping, Collection<PCollectionView<?>> sideInputs, PipelineOptions options, Coder<K> keyCoder, KeySelector<WindowedValue<KeyedWorkItem<K, InputT>>, ?> keySelector) { super( null, stepName, windowedInputCoder, Collections.emptyMap(), mainOutputTag, additionalOutputTags, outputManagerFactory, windowingStrategy, sideInputTagMapping, sideInputs, options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); this.systemReduceFn = systemReduceFn; }
Example #18
Source File: SparkBatchPortablePipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V> void translateGroupByKey( PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); String inputId = getInputId(transformNode); Dataset inputDataset = context.popDataset(inputId); JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD(); WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components); KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder(); Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder(); Coder<V> inputValueCoder = inputKvCoder.getValueCoder(); WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components); WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn(); WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder()); JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow; Partitioner partitioner = getPartitioner(context); if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) { // we can have a memory sensitive translation for non-merging windows groupedByKeyAndWindow = GroupNonMergingWindowsFunctions.groupByKeyAndWindow( inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner); } else { JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly = GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner); // for batch, GroupAlsoByWindow uses an in-memory StateInternals. groupedByKeyAndWindow = groupedByKeyOnly.flatMap( new SparkGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new TranslationUtils.InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(inputValueCoder), context.serializablePipelineOptions)); } context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow)); }
Example #19
Source File: SparkGroupAlsoByWindowViaOutputBufferFn.java From beam with Apache License 2.0 | 5 votes |
public SparkGroupAlsoByWindowViaOutputBufferFn( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn, SerializablePipelineOptions options) { this.windowingStrategy = windowingStrategy; this.stateInternalsFactory = stateInternalsFactory; this.reduceFn = reduceFn; this.options = options; }
Example #20
Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0 | 5 votes |
UpdateStateByKeyOutputIterator( final Iterator< Tuple3<ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, List<byte[]>>>>> input, final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn, final CounterCell droppedDueToLateness) { this.input = input; this.reduceFn = reduceFn; this.droppedDueToLateness = droppedDueToLateness; }
Example #21
Source File: GroupAlsoByWindowViaOutputBufferFn.java From beam with Apache License 2.0 | 5 votes |
public GroupAlsoByWindowViaOutputBufferFn( WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn, SerializablePipelineOptions options) { this.windowingStrategy = windowingStrategy; this.stateInternalsFactory = stateInternalsFactory; this.reduceFn = reduceFn; this.options = options; }
Example #22
Source File: GroupByKeyTranslatorBatch.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateTransform( PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform, TranslationContext context) { @SuppressWarnings("unchecked") final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput(); Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection); WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy(); KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder(); Coder<V> valueCoder = kvCoder.getValueCoder(); // group by key only Coder<K> keyCoder = kvCoder.getKeyCoder(); KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly = input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder)); // group also by windows WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder = WindowedValue.FullWindowedValueCoder.of( KvCoder.of(keyCoder, IterableCoder.of(valueCoder)), windowingStrategy.getWindowFn().windowCoder()); Dataset<WindowedValue<KV<K, Iterable<V>>>> output = groupByKeyOnly.flatMapGroups( new GroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(valueCoder), context.getSerializableOptions()), EncoderHelpers.fromBeamCoder(outputCoder)); context.putDataset(context.getOutput(), output); }
Example #23
Source File: GroupByKeyTranslatorBatch.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) { PCollection<KV<K, V>> input = context.getInput(transform); BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder(); WindowingStrategy windowingStrategy = input.getWindowingStrategy(); WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn(); final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder)); // todo add support for a partition function to be specified, this would use // todo keyedPartition function instead of KeyedGather ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder)); // --- now group also by window. ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset .direct() .<WindowedValue<KV<K, Iterable<V>>>>flatmap( new GroupByWindowFunction( windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder()))); PCollection output = context.getOutput(transform); context.setOutputDataSet(output, outputTset); }
Example #24
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK( DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) { KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder(); SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of( inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder()); WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream = inputDataStream .flatMap( new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>( context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder()); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector); SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder()); Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder()); Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder( KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder()); TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder); TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output"); WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>( reduceFn, operatorName, (Coder) windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ context.getPipelineOptions(), inputElementCoder.getKeyCoder(), (KeySelector) keySelector /* key selector */); SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = keyedWorkItemStream.transform( operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator); return outputDataStream; }
Example #25
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform, FlinkStreamingTranslationContext context) { PCollection<KV<K, InputT>> input = context.getInput(transform); @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> windowingStrategy = (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(); KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder(); SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of( inputKvCoder.getKeyCoder(), inputKvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()); DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input); WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder = WindowedValue.getFullCoder( workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream = inputDataStream .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder())); SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputKvCoder.getValueCoder()); Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder = context.getWindowedInputCoder(context.getOutput(transform)); TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo = context.getTypeInfo(context.getOutput(transform)); TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output"); String fullName = getCurrentTransformName(context); WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator = new WindowDoFnOperator<>( reduceFn, fullName, (Coder) windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ context.getPipelineOptions(), inputKvCoder.getKeyCoder(), keySelector); // our operator expects WindowedValue<KeyedWorkItem> while our input stream // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ... @SuppressWarnings("unchecked") SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream = keyedWorkItemStream .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator) .uid(fullName); context.setOutputDataStream(context.getOutput(transform), outDataStream); }
Example #26
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getOneInputMessageStream(transform); final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1; final WindowingStrategy<?, BoundedWindow> windowingStrategy = ctx.getPortableWindowStrategy(transform, pipeline); final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); final String inputId = ctx.getInputId(transform); final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder(); final Coder<WindowedValue<KV<K, InputT>>> elementCoder = WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder); final TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet())); @SuppressWarnings("unchecked") final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = doTranslateGBK( inputStream, needRepartition, reduceFn, windowingStrategy, kvInputCoder, elementCoder, ctx.getTransformFullName(), ctx.getTransformId(), outputTag, isBounded); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); }
Example #27
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> MessageStream<OpMessage<KV<K, OutputT>>> doTranslateGBK( MessageStream<OpMessage<KV<K, InputT>>> inputStream, boolean needRepartition, SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn, WindowingStrategy<?, BoundedWindow> windowingStrategy, KvCoder<K, InputT> kvInputCoder, Coder<WindowedValue<KV<K, InputT>>> elementCoder, String transformFullName, String transformId, TupleTag<KV<K, OutputT>> outputTag, PCollection.IsBounded isBounded) { final MessageStream<OpMessage<KV<K, InputT>>> filteredInputStream = inputStream.filter(msg -> msg.getType() == OpMessage.Type.ELEMENT); final MessageStream<OpMessage<KV<K, InputT>>> partitionedInputStream; if (!needRepartition) { partitionedInputStream = filteredInputStream; } else { partitionedInputStream = filteredInputStream .partitionBy( msg -> msg.getElement().getValue().getKey(), msg -> msg.getElement(), KVSerde.of( SamzaCoders.toSerde(kvInputCoder.getKeyCoder()), SamzaCoders.toSerde(elementCoder)), "gbk-" + escape(transformId)) .map(kv -> OpMessage.ofElement(kv.getValue())); } final Coder<KeyedWorkItem<K, InputT>> keyedWorkItemCoder = KeyedWorkItemCoder.of( kvInputCoder.getKeyCoder(), kvInputCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder()); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = partitionedInputStream .flatMap(OpAdapter.adapt(new KvToKeyedWorkItemOp<>())) .flatMap( OpAdapter.adapt( new GroupByKeyOp<>( outputTag, keyedWorkItemCoder, reduceFn, windowingStrategy, new DoFnOp.SingleOutputManagerFactory<>(), transformFullName, transformId, isBounded))); return outputStream; }
Example #28
Source File: TransformTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() { return new TransformEvaluator<GroupByKey<K, V>>() { @Override public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) { @SuppressWarnings("unchecked") JavaRDD<WindowedValue<KV<K, V>>> inRDD = ((BoundedDataset<KV<K, V>>) context.borrowDataset(transform)).getRDD(); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); @SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy(); @SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn(); // --- coders. final Coder<K> keyCoder = coder.getKeyCoder(); final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKey; Partitioner partitioner = getPartitioner(context); if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) { // we can have a memory sensitive translation for non-merging windows groupedByKey = GroupNonMergingWindowsFunctions.groupByKeyAndWindow( inRDD, keyCoder, coder.getValueCoder(), windowingStrategy, partitioner); } else { // --- group by key only. JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly = GroupCombineFunctions.groupByKeyOnly(inRDD, keyCoder, wvCoder, partitioner); // --- now group also by window. // for batch, GroupAlsoByWindow uses an in-memory StateInternals. groupedByKey = groupedByKeyOnly.flatMap( new SparkGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new TranslationUtils.InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(coder.getValueCoder()), context.getSerializableOptions())); } context.putDataset(transform, new BoundedDataset<>(groupedByKey)); } @Override public String toNativeString() { return "groupByKey()"; } }; }
Example #29
Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0 | 4 votes |
@Override public Iterator< Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>> apply( final Iterator< Tuple3< /*K*/ ByteArray, Seq</*WV<I>*/ byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>> input) { // --- ACTUAL STATEFUL OPERATION: // // Input Iterator: the partition (~bundle) of a co-grouping of the input // and the previous state (if exists). // // Output Iterator: the output key, and the updated state. // // possible input scenarios for (K, Seq, Option<S>): // (1) Option<S>.isEmpty: new data with no previous state. // (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour). // (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state. final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn = SystemReduceFn.buffering(wvCoder.getValueCoder()); final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider"); final CounterCell droppedDueToClosedWindow = cellProvider.getCounter( MetricName.named( SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER)); final CounterCell droppedDueToLateness = cellProvider.getCounter( MetricName.named( SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER)); // log if there's something to log. final long lateDropped = droppedDueToLateness.getCumulative(); if (lateDropped > 0) { LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped)); droppedDueToLateness.inc(-droppedDueToLateness.getCumulative()); } final long closedWindowDropped = droppedDueToClosedWindow.getCumulative(); if (closedWindowDropped > 0) { LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped)); droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative()); } return scala.collection.JavaConversions.asScalaIterator( new UpdateStateByKeyOutputIterator(input, reduceFn, droppedDueToLateness)); }
Example #30
Source File: WindowGroupP.java From beam with Apache License 2.0 | 4 votes |
KeyManager(K key) { this.timerInternals = new InMemoryTimerInternals(); this.stateInternals = new InMemoryStateInternalsImpl(key); this.reduceFnRunner = new ReduceFnRunner<>( key, windowingStrategy, ExecutableTriggerStateMachine.create( TriggerStateMachines.stateMachineForTrigger( TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, new OutputWindowedValue<KV<K, Iterable<V>>>() { @Override public void outputWindowedValue( KV<K, Iterable<V>> output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { WindowedValue<KV<K, Iterable<V>>> windowedValue = WindowedValue.of(output, timestamp, windows, pane); byte[] encodedValue = Utils.encode(windowedValue, outputCoder); //noinspection ResultOfMethodCallIgnored appendableTraverser.append(encodedValue); } @Override public <AdditionalOutputT> void outputWindowedValue( TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { throw new UnsupportedOperationException("Grouping should not use side outputs"); } }, NullSideInputReader.empty(), SystemReduceFn.buffering(inputValueValueCoder), pipelineOptions.get()); advanceWatermark(latestWatermark, Instant.now()); }