Java Code Examples for com.google.cloud.dataflow.sdk.transforms.Combine#KeyedCombineFn
The following examples show how to use
com.google.cloud.dataflow.sdk.transforms.Combine#KeyedCombineFn .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy. * This method assumes that <b>elements are already grouped by key</b>. * <p/> * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)} * is that this method assumes that a combiner function is provided * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}). * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state. * * @param options the general job configuration options. * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}. * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key. * @param combiner the combiner to be used. * @param outputKvCoder the type of the output values. */ public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create( PipelineOptions options, PCollection input, KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner, KvCoder<K, VOUT> outputKvCoder) { Preconditions.checkNotNull(options); KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder(); FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options, input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner); Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of( outputKvCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo = new CoderTypeInformation<>(windowedOutputElemCoder); DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey .transform("GroupByWindowWithCombiner", new CoderTypeInformation<>(outputKvCoder), windower) .returns(outputTypeInfo); return groupedByKeyAndWindow; }
Example 2
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static <K, VIN, VACC, VOUT> FlinkGroupAlsoByWindowWrapper createForTesting(PipelineOptions options, CoderRegistry registry, WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy, KvCoder<K, VIN> inputCoder, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) { Preconditions.checkNotNull(options); return new FlinkGroupAlsoByWindowWrapper(options, registry, windowingStrategy, inputCoder, combiner); }
Example 3
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
private FlinkGroupAlsoByWindowWrapper(PipelineOptions options, CoderRegistry registry, WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy, KvCoder<K, VIN> inputCoder, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) { Preconditions.checkNotNull(options); this.options = Preconditions.checkNotNull(options); this.coderRegistry = Preconditions.checkNotNull(registry); this.inputKvCoder = Preconditions.checkNotNull(inputCoder);//(KvCoder<K, VIN>) input.getCoder(); this.windowingStrategy = Preconditions.checkNotNull(windowingStrategy);//input.getWindowingStrategy(); this.combineFn = combiner; this.operator = createGroupAlsoByWindowOperator(); this.chainingStrategy = ChainingStrategy.ALWAYS; }
Example 4
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 4 votes |
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey, Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn, Coder<AccumT> accumCoder, final StateContext<?> stateContext) { this(stateKey, withContext(combineFn), accumCoder, stateContext); }
Example 5
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform)); @SuppressWarnings("unchecked") Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn(); KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder(); Coder<VA> accumulatorCoder = null; try { accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { e.printStackTrace(); // TODO } TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder); TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder)); Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation)); FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn); // Partially GroupReduce the values into the intermediate format VA (combine) GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine = new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); // Reduce fully to VO GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn); TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType())); // Fully reduce the values and create output format VO GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet = new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example 6
Source File: FlinkPartialReduceFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkPartialReduceFunction(Combine.KeyedCombineFn<K, VI, VA, ?> keyedCombineFn) { this.keyedCombineFn = keyedCombineFn; }
Example 7
Source File: FlinkReduceFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkReduceFunction(Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn) { this.keyedCombineFn = keyedCombineFn; }