org.apache.flink.api.java.operators.Grouping Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.Grouping.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ScalaAggregateOperator.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #2
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #3
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #4
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform)); GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>(); TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType())); GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet = new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example #5
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform)); GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>(); TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType())); GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet = new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example #6
Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform, FlinkBatchTranslationContext context) { // for now, this is copied from the Combine.PerKey translator. Once we have the new runner API // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn DataSet<WindowedValue<KV<K, InputT>>> inputDataSet = context.getInputDataSet(context.getInput(transform)); Combine.CombineFn<InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<>(); KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder(); Coder<List<InputT>> accumulatorCoder; try { accumulatorCoder = combineFn.getAccumulatorCoder( context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { throw new RuntimeException(e); } WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder( KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder())); Grouping<WindowedValue<KV<K, InputT>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder())); @SuppressWarnings("unchecked") WindowingStrategy<Object, BoundedWindow> boundedStrategy = (WindowingStrategy<Object, BoundedWindow>) windowingStrategy; FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction = new FlinkPartialReduceFunction<>( combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions()); FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction = new FlinkReduceFunction<>( combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions()); // Partially GroupReduce the values into the intermediate format AccumT (combine) String fullName = getCurrentTransformName(context); GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>> groupCombine = new GroupCombineOperator<>( inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + fullName); Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder())); // Fully reduce the values and create output format VO GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet = new GroupReduceOperator<>( intermediateGrouping, partialReduceTypeInfo, reduceFunction, fullName); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example #7
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform)); @SuppressWarnings("unchecked") Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn(); KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder(); Coder<VA> accumulatorCoder = null; try { accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder()); } catch (CannotProvideCoderException e) { e.printStackTrace(); // TODO } TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder); TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder)); Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation)); FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn); // Partially GroupReduce the values into the intermediate format VA (combine) GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine = new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getName()); // Reduce fully to VO GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn); TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType())); // Fully reduce the values and create output format VO GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet = new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }