org.apache.flink.api.common.operators.Keys Java Exaples

Source File: CoGroupOperator.java From flink with Apache License 2.0

6 votes

/**
 * Intermediate step of a CoGroup transformation.
 *
 * <p>To continue the CoGroup transformation, provide a {@link org.apache.flink.api.common.functions.RichCoGroupFunction} by calling
 * {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}.
 *
 */
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}

	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}
	try {
		keys1.areCompatible(keys2);
	} catch (IncompatibleKeysException ike) {
		throw new InvalidProgramException("The pair of co-group keys are not compatible with each other.", ike);
	}

	return new CoGroupOperatorWithoutFunction(keys2);
}

Source File: UdfAnalyzerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void compareAnalyzerResultWithAnnotationsDualInputWithKeys(Class<?> baseClass, Class<?> clazz,
	TypeInformation<?> in1Type, TypeInformation<?> in2Type, TypeInformation<?> outType, String[] keys1, String[] keys2) {
	// expected
	final Set<Annotation> annotations = FunctionAnnotation.readDualForwardAnnotations(clazz);
	final DualInputSemanticProperties expected = SemanticPropUtil.getSemanticPropsDual(annotations, in1Type,
			in2Type, outType);

	// actual
	final UdfAnalyzer ua = new UdfAnalyzer(baseClass, clazz, "operator", in1Type, in2Type, outType, (keys1 == null) ? null
			: new Keys.ExpressionKeys(keys1, in1Type), (keys2 == null) ? null : new Keys.ExpressionKeys(
					keys2, in2Type), true);
	ua.analyze();
	final DualInputSemanticProperties actual = (DualInputSemanticProperties) ua.getSemanticProperties();

	assertEquals(expected.toString(), actual.toString());
}

Source File: PartitionOperator.java From flink with Apache License 2.0

6 votes

private static <T> Ordering computeOrdering(Keys<T> pKeys, Order[] orders) {
	Ordering ordering = new Ordering();
	final int[] logicalKeyPositions = pKeys.computeLogicalKeyPositions();

	if (orders == null) {
		for (int key : logicalKeyPositions) {
			ordering.appendOrdering(key, null, Order.ASCENDING);
		}
	} else {
		final TypeInformation<?>[] originalKeyFieldTypes = pKeys.getOriginalKeyFieldTypes();
		int index = 0;
		for (int i = 0; i < originalKeyFieldTypes.length; i++) {
			final int typeTotalFields = originalKeyFieldTypes[i].getTotalFields();
			for (int j = index; j < index + typeTotalFields; j++) {
				ordering.appendOrdering(logicalKeyPositions[j], null, orders[i]);
			}
			index += typeTotalFields;
		}
	}

	return ordering;
}

Source File: CoGroupOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Intermediate step of a CoGroup transformation.
 *
 * <p>To continue the CoGroup transformation, provide a {@link org.apache.flink.api.common.functions.RichCoGroupFunction} by calling
 * {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}.
 *
 */
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}

	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}
	try {
		keys1.areCompatible(keys2);
	} catch (IncompatibleKeysException ike) {
		throw new InvalidProgramException("The pair of co-group keys are not compatible with each other.", ike);
	}

	return new CoGroupOperatorWithoutFunction(keys2);
}

Source File: DeltaIterationResultSet.java From flink with Apache License 2.0

6 votes

DeltaIterationResultSet(ExecutionEnvironment context,
						TypeInformation<ST> typeSS,
						TypeInformation<WT> typeWS,
						DeltaIteration<ST, WT> iterationHead,
						DataSet<ST> nextSolutionSet,
						DataSet<WT> nextWorkset,
						Keys<ST> keys,
						int maxIterations) {
	super(context, typeSS);
	this.iterationHead = iterationHead;
	this.nextWorkset = nextWorkset;
	this.nextSolutionSet = nextSolutionSet;
	this.keys = keys;
	this.maxIterations = maxIterations;
	this.typeWS = typeWS;
}

Source File: PlanUnwrappingSortedReduceGroupOperator.java From flink with Apache License 2.0

6 votes

public PlanUnwrappingSortedReduceGroupOperator(
	GroupReduceFunction<IN, OUT> udf,
	Keys.SelectorFunctionKeys<IN, K1> groupingKey,
	Keys.SelectorFunctionKeys<IN, K2> sortingKey,
	String name,
	TypeInformation<OUT> outType,
	TypeInformation<Tuple3<K1, K2, IN>>
	typeInfoWithKey, boolean combinable) {
	super(
		combinable ?
			new TupleUnwrappingGroupCombinableGroupReducer<IN, OUT, K1, K2>(udf) :
			new TupleUnwrappingNonCombinableGroupReducer<IN, OUT, K1, K2>(udf),
		new UnaryOperatorInformation<>(typeInfoWithKey, outType), groupingKey.computeLogicalKeyPositions(), name);

	super.setCombinable(combinable);
}

Source File: PlanUnwrappingSortedReduceGroupOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

public PlanUnwrappingSortedReduceGroupOperator(
	GroupReduceFunction<IN, OUT> udf,
	Keys.SelectorFunctionKeys<IN, K1> groupingKey,
	Keys.SelectorFunctionKeys<IN, K2> sortingKey,
	String name,
	TypeInformation<OUT> outType,
	TypeInformation<Tuple3<K1, K2, IN>>
	typeInfoWithKey, boolean combinable) {
	super(
		combinable ?
			new TupleUnwrappingGroupCombinableGroupReducer<IN, OUT, K1, K2>(udf) :
			new TupleUnwrappingNonCombinableGroupReducer<IN, OUT, K1, K2>(udf),
		new UnaryOperatorInformation<>(typeInfoWithKey, outType), groupingKey.computeLogicalKeyPositions(), name);

	super.setCombinable(combinable);
}

Source File: PartitionOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}

Source File: PlanBothUnwrappingCoGroupOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

public PlanBothUnwrappingCoGroupOperator(
		CoGroupFunction<I1, I2, OUT> udf,
		Keys.SelectorFunctionKeys<I1, K> key1,
		Keys.SelectorFunctionKeys<I2, K> key2,
		String name,
		TypeInformation<OUT> type,
		TypeInformation<Tuple2<K, I1>> typeInfoWithKey1,
		TypeInformation<Tuple2<K, I2>> typeInfoWithKey2) {

	super(
			new TupleBothUnwrappingCoGrouper<I1, I2, OUT, K>(udf),
			new BinaryOperatorInformation<Tuple2<K, I1>, Tuple2<K, I2>, OUT>(
					typeInfoWithKey1,
					typeInfoWithKey2,
					type),
			key1.computeLogicalKeyPositions(),
			key2.computeLogicalKeyPositions(),
			name);
}

Source File: SplitDataProperties.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Defines that the data within an input split is sorted on the fields defined by the field expressions
 * in the specified orders. Multiple field expressions must be separated by the semicolon ';' character.
 * All records of an input split must be emitted by the input format in the defined order.
 *
 * <p><b>
 *     IMPORTANT: Providing wrong information with SplitDataProperties can cause wrong results!
 * </b>
 *
 * @param orderFields The field expressions of the grouping key.
 * @param orders The orders of the fields.
 * @return This SplitDataProperties object.
 */
public SplitDataProperties<T> splitsOrderedBy(String orderFields, Order[] orders) {

	if (orderFields == null || orders == null) {
		throw new InvalidProgramException("OrderFields or Orders may not be null.");
	}

	String[] orderKeysA = orderFields.split(";");
	if (orderKeysA.length == 0) {
		throw new InvalidProgramException("OrderFields may not be empty.");
	} else if (orders.length == 0) {
		throw new InvalidProgramException("Orders may not be empty");
	} else if (orderKeysA.length != orders.length) {
		throw new InvalidProgramException("Number of OrderFields and Orders must match.");
	}

	if (this.splitGroupKeys != null) {
		throw new InvalidProgramException("DataSource may either be grouped or sorted.");
	}

	this.splitOrdering = new Ordering();

	for (int i = 0; i < orderKeysA.length; i++) {
		String keyExp = orderKeysA[i];
		Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
		int[] flatKeys = ek.computeLogicalKeyPositions();

		for (int key : flatKeys) {
			// check for duplicates
			for (int okey : splitOrdering.getFieldPositions()) {
				if (key == okey) {
					throw new InvalidProgramException("Duplicate field in field expression " + keyExp);
				}
			}
			// append key
			this.splitOrdering.appendOrdering(key, null, orders[i]);
		}
	}
	return this;
}

Source File: JoinOperator.java From flink with Apache License 2.0

5 votes

protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) {
	super(input1, input2, keys1, keys2,
			new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()),
			returnType, hint, Utils.getCallLocationName(4)); // We need to use the 4th element in the stack because the call comes through .types().

	joinProj = null;
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Source File: DataStream.java From Flink-CEPplus with Apache License 2.0

5 votes

private <K> DataStream<T> partitionCustom(Partitioner<K> partitioner, Keys<T> keys) {
	KeySelector<T, K> keySelector = KeySelectorUtil.getSelectorForOneKey(keys, partitioner, getType(), getExecutionConfig());

	return setConnectionType(
			new CustomPartitionerWrapper<>(
					clean(partitioner),
					clean(keySelector)));
}

Source File: SplitDataProperties.java From flink with Apache License 2.0

5 votes

private int[] getAllFlatKeys(String[] fieldExpressions) {

		int[] allKeys = null;

		for (String keyExp : fieldExpressions) {
			Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
			int[] flatKeys = ek.computeLogicalKeyPositions();

			if (allKeys == null) {
				allKeys = flatKeys;
			} else {
				// check for duplicates
				for (int key1 : flatKeys) {
					for (int key2 : allKeys) {
						if (key1 == key2) {
							throw new InvalidProgramException("Duplicate fields in field expression " + keyExp);
						}
					}
				}
				// append flat keys
				int oldLength = allKeys.length;
				int newLength = oldLength + flatKeys.length;
				allKeys = Arrays.copyOf(allKeys, newLength);
				System.arraycopy(flatKeys, 0, allKeys, oldLength, flatKeys.length);
			}
		}

		return allKeys;
	}

Source File: SortPartitionOperator.java From flink with Apache License 2.0

5 votes

/**
 * Appends an additional sort order with the specified field in the specified order to the
 * local partition sorting of the DataSet.
 *
 * @param field The field expression referring to the field of the additional sort order of
 *              the local partition sorting.
 * @param order The order of the additional sort order of the local partition sorting.
 * @return The DataSet with sorted local partitions.
 */
public SortPartitionOperator<T> sortPartition(String field, Order order) {
	if (useKeySelector) {
		throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
	}

	ensureSortableKey(field);
	keys.add(new Keys.ExpressionKeys<>(field, getType()));
	orders.add(order);

	return this;
}

Source File: JoinOperatorSetsBase.java From Flink-CEPplus with Apache License 2.0

5 votes

protected JoinOperatorSetsPredicateBase(Keys<I1> keys1) {
	if (keys1 == null) {
		throw new NullPointerException();
	}

	if (keys1.isEmpty()) {
		throw new InvalidProgramException("The join keys must not be empty.");
	}

	this.keys1 = keys1;
}

Source File: DataSink.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Sorts each local partition of a {@link org.apache.flink.api.java.tuple.Tuple} data set
 * on the specified field in the specified {@link Order} before it is emitted by the output format.
 *
 * <p><b>Note: Only tuple data sets can be sorted using integer field indices.</b>
 *
 * <p>The tuple data set can be sorted on multiple fields in different orders
 * by chaining {@link #sortLocalOutput(int, Order)} calls.
 *
 * @param field The Tuple field on which the data set is locally sorted.
 * @param order The Order in which the specified Tuple field is locally sorted.
 * @return This data sink operator with specified output order.
 *
 * @see org.apache.flink.api.java.tuple.Tuple
 * @see Order
 *
 * @deprecated Use {@link DataSet#sortPartition(int, Order)} instead
 */
@Deprecated
@PublicEvolving
public DataSink<T> sortLocalOutput(int field, Order order) {

	// get flat keys
	Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(field, this.type);
	int[] flatKeys = ek.computeLogicalKeyPositions();

	if (!Keys.ExpressionKeys.isSortKey(field, this.type)) {
		throw new InvalidProgramException("Selected sort key is not a sortable type");
	}

	if (this.sortKeyPositions == null) {
		// set sorting info
		this.sortKeyPositions = flatKeys;
		this.sortOrders = new Order[flatKeys.length];
		Arrays.fill(this.sortOrders, order);
	} else {
		// append sorting info to exising info
		int oldLength = this.sortKeyPositions.length;
		int newLength = oldLength + flatKeys.length;
		this.sortKeyPositions = Arrays.copyOf(this.sortKeyPositions, newLength);
		this.sortOrders = Arrays.copyOf(this.sortOrders, newLength);

		for (int i = 0; i < flatKeys.length; i++) {
			this.sortKeyPositions[oldLength + i] = flatKeys[i];
			this.sortOrders[oldLength + i] = order;
		}
	}

	return this;
}

Source File: SplitDataProperties.java From flink with Apache License 2.0

5 votes

/**
 * Defines that the data within an input split is sorted on the fields defined by the field expressions
 * in the specified orders. Multiple field expressions must be separated by the semicolon ';' character.
 * All records of an input split must be emitted by the input format in the defined order.
 *
 * <p><b>
 *     IMPORTANT: Providing wrong information with SplitDataProperties can cause wrong results!
 * </b>
 *
 * @param orderFields The field expressions of the grouping key.
 * @param orders The orders of the fields.
 * @return This SplitDataProperties object.
 */
public SplitDataProperties<T> splitsOrderedBy(String orderFields, Order[] orders) {

	if (orderFields == null || orders == null) {
		throw new InvalidProgramException("OrderFields or Orders may not be null.");
	}

	String[] orderKeysA = orderFields.split(";");
	if (orderKeysA.length == 0) {
		throw new InvalidProgramException("OrderFields may not be empty.");
	} else if (orders.length == 0) {
		throw new InvalidProgramException("Orders may not be empty");
	} else if (orderKeysA.length != orders.length) {
		throw new InvalidProgramException("Number of OrderFields and Orders must match.");
	}

	if (this.splitGroupKeys != null) {
		throw new InvalidProgramException("DataSource may either be grouped or sorted.");
	}

	this.splitOrdering = new Ordering();

	for (int i = 0; i < orderKeysA.length; i++) {
		String keyExp = orderKeysA[i];
		Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
		int[] flatKeys = ek.computeLogicalKeyPositions();

		for (int key : flatKeys) {
			// check for duplicates
			for (int okey : splitOrdering.getFieldPositions()) {
				if (key == okey) {
					throw new InvalidProgramException("Duplicate field in field expression " + keyExp);
				}
			}
			// append key
			this.splitOrdering.appendOrdering(key, null, orders[i]);
		}
	}
	return this;
}

Source File: DeltaIteration.java From flink with Apache License 2.0

5 votes

public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) {
	initialSolutionSet = solutionSet;
	initialWorkset = workset;
	solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this);
	worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType());
	this.keys = keys;
	this.maxIterations = maxIterations;
}

Source File: OneInputOperatorTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Partitions the operator state of a {@link OperatorTransformation} by the given key positions.
 *
 * @param fields The position of the fields on which the {@code OperatorTransformation} will be grouped.
 * @return The {@code OperatorTransformation} with partitioned state.
 */
public KeyedOperatorTransformation<Tuple, T> keyBy(int... fields) {
	if (dataSet.getType() instanceof BasicArrayTypeInfo || dataSet.getType() instanceof PrimitiveArrayTypeInfo) {
		return keyBy(KeySelectorUtil.getSelectorForArray(fields, dataSet.getType()));
	} else {
		return keyBy(new Keys.ExpressionKeys<>(fields, dataSet.getType()));
	}
}

Source File: CoGroupOperator.java From Flink-CEPplus with Apache License 2.0

5 votes

private CoGroupOperatorWithoutFunction(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}
	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}

	this.keys2 = keys2;

	this.groupSortKeyOrderFirst = new ArrayList<>();
	this.groupSortKeyOrderSecond = new ArrayList<>();
}

Source File: SortPartitionOperator.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Appends an additional sort order with the specified field in the specified order to the
 * local partition sorting of the DataSet.
 *
 * @param field The field index of the additional sort order of the local partition sorting.
 * @param order The order of the additional sort order of the local partition sorting.
 * @return The DataSet with sorted local partitions.
 */
public SortPartitionOperator<T> sortPartition(int field, Order order) {
	if (useKeySelector) {
		throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
	}

	ensureSortableKey(field);
	keys.add(new Keys.ExpressionKeys<>(field, getType()));
	orders.add(order);

	return this;
}

Source File: SortPartitionOperator.java From Flink-CEPplus with Apache License 2.0

5 votes

public <K> SortPartitionOperator(DataSet<T> dataSet, Keys.SelectorFunctionKeys<T, K> sortKey, Order sortOrder, String sortLocationName) {
	this(dataSet, sortLocationName);
	this.useKeySelector = true;

	ensureSortableKey(sortKey);

	keys.add(sortKey);
	orders.add(sortOrder);
}

Source File: JoinOperatorSetsBase.java From flink with Apache License 2.0

5 votes

protected JoinOperatorSetsPredicateBase(Keys<I1> keys1) {
	if (keys1 == null) {
		throw new NullPointerException();
	}

	if (keys1.isEmpty()) {
		throw new InvalidProgramException("The join keys must not be empty.");
	}

	this.keys1 = keys1;
}

Source File: AggregationFunctionTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void minMaxByTest() throws Exception {
	// Tuples are grouped on field 0, aggregated on field 1

	// preparing expected outputs
	List<Tuple3<Integer, Integer, Integer>> maxByFirstExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2));

	List<Tuple3<Integer, Integer, Integer>> maxByLastExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 5),
			Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 8));

	List<Tuple3<Integer, Integer, Integer>> minByFirstExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0));

	List<Tuple3<Integer, Integer, Integer>> minByLastExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3),
			Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6));

	// some necessary boiler plate
	TypeInformation<Tuple3<Integer, Integer, Integer>> typeInfo = TypeExtractor
			.getForObject(Tuple3.of(0, 0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<Tuple3<Integer, Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new int[]{0}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionFirst =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, true, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionLast =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, false, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionFirst =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, true, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionLast =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, false, config);

	assertEquals(maxByFirstExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxByFunctionFirst, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(maxByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxByFunctionLast, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(minByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionLast, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(minByFirstExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionFirst, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));
}

Source File: DataSetUtils.java From flink with Apache License 2.0

4 votes

/**
 * Range-partitions a DataSet using the specified key selector function.
 */
public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType());
	return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName());
}

Source File: JoinOperatorSetsBase.java From Flink-CEPplus with Apache License 2.0

4 votes

protected JoinFunctionAssigner<I1, I2> createJoinFunctionAssigner(Keys<I2> keys2) {
	DefaultJoin<I1, I2> join = createDefaultJoin(keys2);
	return new DefaultJoinFunctionAssigner(join);
}

Source File: JoinOperator.java From flink with Apache License 2.0

4 votes

public EquiJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, FlatJoinFunction<I1, I2, OUT> generatedFunction, JoinFunction<I1, I2, OUT> function,
		TypeInformation<OUT> returnType, JoinHint hint, String joinLocationName) {
	this(input1, input2, keys1, keys2, generatedFunction, function, returnType, hint, joinLocationName, JoinType.INNER);
}

Source File: SortPartitionOperator.java From flink with Apache License 2.0

4 votes

protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {

		String name = "Sort at " + sortLocationName;

		if (useKeySelector) {
			return translateToDataFlowWithKeyExtractor(input, (Keys.SelectorFunctionKeys<T, ?>) keys.get(0), orders.get(0), name);
		}

		// flatten sort key positions
		List<Integer> allKeyPositions = new ArrayList<>();
		List<Order> allOrders = new ArrayList<>();
		for (int i = 0, length = keys.size(); i < length; i++) {
			int[] sortKeyPositions = keys.get(i).computeLogicalKeyPositions();
			Order order = orders.get(i);

			for (int sortKeyPosition : sortKeyPositions) {
				allKeyPositions.add(sortKeyPosition);
				allOrders.add(order);
			}
		}

		Ordering partitionOrdering = new Ordering();
		for (int i = 0, length = allKeyPositions.size(); i < length; i++) {
			partitionOrdering.appendOrdering(allKeyPositions.get(i), null, allOrders.get(i));
		}

		// distinguish between partition types
		UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getType(), getType());
		SortPartitionOperatorBase<T> noop = new SortPartitionOperatorBase<>(operatorInfo, partitionOrdering, name);
		noop.setInput(input);
		if (this.getParallelism() < 0) {
			// use parallelism of input if not explicitly specified
			noop.setParallelism(input.getParallelism());
		} else {
			// use explicitly specified parallelism
			noop.setParallelism(this.getParallelism());
		}

		return noop;

	}

org.apache.flink.api.common.operators.Keys Java Examples