Java Code Examples for org.apache.beam.sdk.transforms.Combine#CombineFn
The following examples show how to use
org.apache.beam.sdk.transforms.Combine#CombineFn .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AggregatorCombiner.java From beam with Apache License 2.0 | 7 votes |
public AggregatorCombiner( Combine.CombineFn<InputT, AccumT, OutputT> combineFn, WindowingStrategy<?, ?> windowingStrategy, Coder<AccumT> accumulatorCoder, Coder<OutputT> outputCoder) { this.combineFn = combineFn; this.windowingStrategy = (WindowingStrategy<InputT, W>) windowingStrategy; this.timestampCombiner = windowingStrategy.getTimestampCombiner(); this.accumulatorCoder = IterableCoder.of( WindowedValue.FullWindowedValueCoder.of( accumulatorCoder, windowingStrategy.getWindowFn().windowCoder())); this.outputCoder = IterableCoder.of( WindowedValue.FullWindowedValueCoder.of( outputCoder, windowingStrategy.getWindowFn().windowCoder())); }
Example 2
Source File: FlinkBroadcastStateInternals.java From beam with Apache License 2.0 | 5 votes |
FlinkKeyedCombiningState( OperatorStateBackend flinkStateBackend, StateTag<CombiningState<InputT, AccumT, OutputT>> address, Combine.CombineFn<InputT, AccumT, OutputT> combineFn, StateNamespace namespace, Coder<AccumT> accumCoder, FlinkBroadcastStateInternals<K2> flinkStateInternals) { super(flinkStateBackend, address.getId(), namespace, accumCoder); this.namespace = namespace; this.address = address; this.combineFn = combineFn; this.flinkStateInternals = flinkStateInternals; }
Example 3
Source File: FlinkBroadcastStateInternals.java From beam with Apache License 2.0 | 5 votes |
FlinkCombiningState( OperatorStateBackend flinkStateBackend, StateTag<CombiningState<InputT, AccumT, OutputT>> address, Combine.CombineFn<InputT, AccumT, OutputT> combineFn, StateNamespace namespace, Coder<AccumT> accumCoder) { super(flinkStateBackend, address.getId(), namespace, accumCoder); this.namespace = namespace; this.address = address; this.combineFn = combineFn; }
Example 4
Source File: SamzaStoreStateInternals.java From beam with Apache License 2.0 | 5 votes |
protected SamzaAccumulatorCombiningState( StateNamespace namespace, StateTag<? extends State> address, Coder<AccumT> coder, Combine.CombineFn<InT, AccumT, OutT> combineFn) { super(namespace, address, coder); this.combineFn = combineFn; }
Example 5
Source File: CombineValuesFnFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCombineValuesFnAdd() throws Exception { TestReceiver receiver = new TestReceiver(); MeanInts mean = new MeanInts(); Combine.CombineFn<Integer, CountSum, String> combiner = mean; ParDoFn combineParDoFn = createCombineValuesFn( CombinePhase.ADD, combiner, StringUtf8Coder.of(), BigEndianIntegerCoder.of(), new CountSumCoder(), WindowingStrategy.globalDefault()); combineParDoFn.startBundle(receiver); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("a", Arrays.asList(5, 6, 7)))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("b", Arrays.asList(1, 3, 7)))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("c", Arrays.asList(3, 6, 8, 9)))); combineParDoFn.finishBundle(); Object[] expectedReceivedElems = { WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(3, 18))), WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 11))), WindowedValue.valueInGlobalWindow(KV.of("c", new CountSum(4, 26))) }; assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray()); }
Example 6
Source File: CombineValuesFnFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCombineValuesFnMerge() throws Exception { TestReceiver receiver = new TestReceiver(); MeanInts mean = new MeanInts(); Combine.CombineFn<Integer, CountSum, String> combiner = mean; ParDoFn combineParDoFn = createCombineValuesFn( CombinePhase.MERGE, combiner, StringUtf8Coder.of(), BigEndianIntegerCoder.of(), new CountSumCoder(), WindowingStrategy.globalDefault()); combineParDoFn.startBundle(receiver); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow( KV.of( "a", Arrays.asList(new CountSum(3, 6), new CountSum(2, 9), new CountSum(1, 12))))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow( KV.of("b", Arrays.asList(new CountSum(2, 20), new CountSum(1, 1))))); combineParDoFn.finishBundle(); Object[] expectedReceivedElems = { WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(6, 27))), WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 21))), }; assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray()); }
Example 7
Source File: CombineValuesFnFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCombineValuesFnExtract() throws Exception { TestReceiver receiver = new TestReceiver(); MeanInts mean = new MeanInts(); Combine.CombineFn<Integer, CountSum, String> combiner = mean; ParDoFn combineParDoFn = createCombineValuesFn( CombinePhase.EXTRACT, combiner, StringUtf8Coder.of(), BigEndianIntegerCoder.of(), new CountSumCoder(), WindowingStrategy.globalDefault()); combineParDoFn.startBundle(receiver); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(6, 27)))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 21)))); combineParDoFn.finishBundle(); assertArrayEquals( new Object[] { WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 4.5))), WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 7.0))) }, receiver.receivedElems.toArray()); }
Example 8
Source File: StateBinder.java From beam with Apache License 2.0 | 4 votes |
<InputT, AccumT, OutputT> CombiningState<InputT, AccumT, OutputT> bindCombining( String id, StateSpec<CombiningState<InputT, AccumT, OutputT>> spec, Coder<AccumT> accumCoder, Combine.CombineFn<InputT, AccumT, OutputT> combineFn);
Example 9
Source File: StateSpec.java From beam with Apache License 2.0 | 4 votes |
@Override public ResultT dispatchCombining(Combine.CombineFn<?, ?, ?> combineFn, Coder<?> accumCoder) { return dispatchDefault(); }
Example 10
Source File: ParDoTranslation.java From beam with Apache License 2.0 | 4 votes |
@VisibleForTesting static StateSpec<?> fromProto(RunnerApi.StateSpec stateSpec, RehydratedComponents components) throws IOException { switch (stateSpec.getSpecCase()) { case READ_MODIFY_WRITE_SPEC: return StateSpecs.value( components.getCoder(stateSpec.getReadModifyWriteSpec().getCoderId())); case BAG_SPEC: return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId())); case COMBINING_SPEC: FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn(); if (!combineFnSpec.getUrn().equals(CombineTranslation.JAVA_SERIALIZED_COMBINE_FN_URN)) { throw new UnsupportedOperationException( String.format( "Cannot create %s from non-Java %s: %s", StateSpec.class.getSimpleName(), Combine.CombineFn.class.getSimpleName(), combineFnSpec.getUrn())); } Combine.CombineFn<?, ?, ?> combineFn = (Combine.CombineFn<?, ?, ?>) SerializableUtils.deserializeFromByteArray( combineFnSpec.getPayload().toByteArray(), Combine.CombineFn.class.getSimpleName()); // Rawtype coder cast because it is required to be a valid accumulator coder // for the CombineFn, by construction return StateSpecs.combining( (Coder) components.getCoder(stateSpec.getCombiningSpec().getAccumulatorCoderId()), combineFn); case MAP_SPEC: return StateSpecs.map( components.getCoder(stateSpec.getMapSpec().getKeyCoderId()), components.getCoder(stateSpec.getMapSpec().getValueCoderId())); case SET_SPEC: return StateSpecs.set(components.getCoder(stateSpec.getSetSpec().getElementCoderId())); case SPEC_NOT_SET: default: throw new IllegalArgumentException( String.format("Unknown %s: %s", RunnerApi.StateSpec.class.getName(), stateSpec)); } }
Example 11
Source File: AsList.java From DataflowTemplates with Apache License 2.0 | 4 votes |
public static <T> Combine.CombineFn<T, List<T>, List<T>> fn() { return new Impl<>(); }
Example 12
Source File: CombinePerKeyTranslatorBatch.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateTransform( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, TranslationContext context) { Combine.PerKey combineTransform = (Combine.PerKey) transform; @SuppressWarnings("unchecked") final PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) context.getInput(); @SuppressWarnings("unchecked") final PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) context.getOutput(); @SuppressWarnings("unchecked") final Combine.CombineFn<InputT, AccumT, OutputT> combineFn = (Combine.CombineFn<InputT, AccumT, OutputT>) combineTransform.getFn(); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); Dataset<WindowedValue<KV<K, InputT>>> inputDataset = context.getDataset(input); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder(); Coder<K> keyCoder = inputCoder.getKeyCoder(); KvCoder<K, OutputT> outputKVCoder = (KvCoder<K, OutputT>) output.getCoder(); Coder<OutputT> outputCoder = outputKVCoder.getValueCoder(); KeyValueGroupedDataset<K, WindowedValue<KV<K, InputT>>> groupedDataset = inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder)); Coder<AccumT> accumulatorCoder = null; try { accumulatorCoder = combineFn.getAccumulatorCoder( input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } Dataset<Tuple2<K, Iterable<WindowedValue<OutputT>>>> combinedDataset = groupedDataset.agg( new AggregatorCombiner<K, InputT, AccumT, OutputT, BoundedWindow>( combineFn, windowingStrategy, accumulatorCoder, outputCoder) .toColumn()); // expand the list into separate elements and put the key back into the elements WindowedValue.WindowedValueCoder<KV<K, OutputT>> wvCoder = WindowedValue.FullWindowedValueCoder.of( outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); Dataset<WindowedValue<KV<K, OutputT>>> outputDataset = combinedDataset.flatMap( (FlatMapFunction< Tuple2<K, Iterable<WindowedValue<OutputT>>>, WindowedValue<KV<K, OutputT>>>) tuple2 -> { K key = tuple2._1(); Iterable<WindowedValue<OutputT>> windowedValues = tuple2._2(); List<WindowedValue<KV<K, OutputT>>> result = new ArrayList<>(); for (WindowedValue<OutputT> windowedValue : windowedValues) { KV<K, OutputT> kv = KV.of(key, windowedValue.getValue()); result.add( WindowedValue.of( kv, windowedValue.getTimestamp(), windowedValue.getWindows(), windowedValue.getPane())); } return result.iterator(); }, EncoderHelpers.fromBeamCoder(wvCoder)); context.putDataset(output, outputDataset); }
Example 13
Source File: SqlTransform.java From beam with Apache License 2.0 | 4 votes |
static UdafDefinition of(String udafName, Combine.CombineFn combineFn) { return new AutoValue_SqlTransform_UdafDefinition(udafName, combineFn); }
Example 14
Source File: UdfUdafProvider.java From beam with Apache License 2.0 | 4 votes |
default Map<String, Combine.CombineFn> getUdafs() { return Collections.emptyMap(); }
Example 15
Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform, FlinkBatchTranslationContext context) { // for now, this is copied from the Combine.PerKey translator. Once we have the new runner API // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); Combine.CombineFn<InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<>(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<List<InputT>> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder())); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder())); @SuppressWarnings("unchecked") WindowingStrategy<Object, BoundedWindow> boundedStrategy = (WindowingStrategy<Object, BoundedWindow>) windowingStrategy; FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions()); FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions()); // Partially GroupReduce the values into the intermediate format AccumT (combine) String fullName = getCurrentTransformName(context); GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + fullName); Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format VO GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, partialReduceTypeInfo, reduceFunction, fullName); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example 16
Source File: ReduceByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <InputT, KeyT, ValueT, AccT, OutputT> Combine.CombineFn<ValueT, AccT, OutputT> asCombineFn( ReduceByKey<InputT, KeyT, ValueT, AccT, OutputT> operator) { @SuppressWarnings("unchecked") ReduceByKey<InputT, KeyT, ValueT, AccT, OutputT> cast = (ReduceByKey) operator; VoidFunction<AccT> accumulatorFactory = cast.getAccumulatorFactory(); BinaryFunction<AccT, ValueT, AccT> accumulate = cast.getAccumulate(); CombinableBinaryFunction<AccT> mergeAccumulators = cast.getMergeAccumulators(); UnaryFunction<AccT, OutputT> outputFn = cast.getOutputFn(); TypeDescriptor<AccT> accumulatorType = cast.getAccumulatorType(); return new Combine.CombineFn<ValueT, AccT, OutputT>() { @Override public AccT createAccumulator() { return accumulatorFactory.apply(); } @Override public Coder<AccT> getAccumulatorCoder(CoderRegistry registry, Coder<ValueT> inputCoder) throws CannotProvideCoderException { return registry.getCoder(accumulatorType); } @Override public AccT addInput(AccT mutableAccumulator, ValueT input) { return accumulate.apply(mutableAccumulator, input); } @Override public AccT mergeAccumulators(Iterable<AccT> accumulators) { AccT accumulated = null; for (AccT o : accumulators) { if (accumulated == null) { accumulated = o; } else { accumulated = mergeAccumulators.apply(accumulated, o); } } return accumulated; } @Override public OutputT extractOutput(AccT accumulator) { return outputFn.apply(accumulator); } }; }
Example 17
Source File: StateSpec.java From beam with Apache License 2.0 | votes |
ResultT dispatchCombining(Combine.CombineFn<?, ?, ?> combineFn, Coder<?> accumCoder);
Example 18
Source File: NamedAggregators.java From beam with Apache License 2.0 | votes |
Combine.CombineFn<InputT, InterT, OutputT> getCombineFn();
Example 19
Source File: NamedAggregators.java From beam with Apache License 2.0 | votes |
Combine.CombineFn<InputT, InterT, OutputT> getCombineFn();
Example 20
Source File: SqlTransform.java From beam with Apache License 2.0 | votes |
abstract Combine.CombineFn combineFn();