org.apache.flink.api.java.operators.Grouping Java Exaples

Source File: ScalaAggregateOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0

4 votes

@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkBatchTranslationContext context) {

  // for now, this is copied from the Combine.PerKey translator. Once we have the new runner API
  // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn

  DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));

  Combine.CombineFn<InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<>();

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();

  Coder<List<InputT>> accumulatorCoder;

  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            context.getInput(transform).getPipeline().getCoderRegistry(),
            inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  WindowingStrategy<?, ?> windowingStrategy =
      context.getInput(transform).getWindowingStrategy();

  TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(
              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
              windowingStrategy.getWindowFn().windowCoder()));

  Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
      inputDataSet.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, BoundedWindow> boundedStrategy =
      (WindowingStrategy<Object, BoundedWindow>) windowingStrategy;

  FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction =
      new FlinkPartialReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction =
      new FlinkReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  // Partially GroupReduce the values into the intermediate format AccumT (combine)
  String fullName = getCurrentTransformName(context);
  GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>>
      groupCombine =
          new GroupCombineOperator<>(
              inputGrouping,
              partialReduceTypeInfo,
              partialReduceFunction,
              "GroupCombine: " + fullName);

  Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
      groupCombine.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  // Fully reduce the values and create output format VO
  GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>>
      outputDataSet =
          new GroupReduceOperator<>(
              intermediateGrouping, partialReduceTypeInfo, reduceFunction, fullName);

  context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform));

	@SuppressWarnings("unchecked")
	Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn();

	KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder();

	Coder<VA> accumulatorCoder =
			null;
	try {
		accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder());
	} catch (CannotProvideCoderException e) {
		e.printStackTrace();
		// TODO
	}

	TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder);
	TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder));

	Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));

	FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn);

	// Partially GroupReduce the values into the intermediate format VA (combine)
	GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine =
			new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction,
					"GroupCombine: " + transform.getName());

	// Reduce fully to VO
	GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn);

	TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));

	// Fully reduce the values and create output format VO
	GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet =
			new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

org.apache.flink.api.java.operators.Grouping Java Examples