org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint Java Exaples

Source File: ReducePerformance.java From Flink-CEPplus with Apache License 2.0

6 votes

private static <T, B extends CopyableIterator<T>> void testReducePerformance
	(B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();

	@SuppressWarnings("unchecked")
	DataSet<T> output =
		env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo)
			.groupBy("0")
			.reduce(new SumReducer()).setCombineHint(hint);

	long start = System.currentTimeMillis();

	System.out.println(output.count());

	long end = System.currentTimeMillis();
	if (print) {
		System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ===");
	}
}

Source File: Simplify.java From flink with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.flatMap(new SymmetrizeAndRemoveSelfLoops<>(clipAndFlip))
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: Simplify.java From flink with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.filter(new RemoveSelfLoops<>())
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: DistinctOperator.java From flink with Apache License 2.0

6 votes

private static <IN, K> org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateSelectorFunctionDistinct(
		SelectorFunctionKeys<IN, ?> rawKeys,
		ReduceFunction<IN> function,
		TypeInformation<IN> outputType,
		String name,
		Operator<IN> input,
		int parallelism,
		CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys;

	TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<IN, K> reducer =
			new PlanUnwrappingReduceOperator<>(function, keys, name, outputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setCombineHint(hint);
	reducer.setParallelism(parallelism);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: ReduceOperator.java From flink with Apache License 2.0

6 votes

private static <T, K> org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateSelectorFunctionReducer(
	SelectorFunctionKeys<T, ?> rawKeys,
	ReduceFunction<T> function,
	TypeInformation<T> inputType,
	String name,
	Operator<T> input,
	int parallelism,
	CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<T, K> keys = (SelectorFunctionKeys<T, K>) rawKeys;

	TypeInformation<Tuple2<K, T>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, T>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<T, K> reducer = new PlanUnwrappingReduceOperator<>(function, keys, name, inputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setParallelism(parallelism);
	reducer.setCombineHint(hint);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: ReducePerformance.java From flink with Apache License 2.0

6 votes

private static <T, B extends CopyableIterator<T>> void testReducePerformance
	(B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();

	@SuppressWarnings("unchecked")
	DataSet<T> output =
		env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo)
			.groupBy("0")
			.reduce(new SumReducer()).setCombineHint(hint);

	long start = System.currentTimeMillis();

	System.out.println(output.count());

	long end = System.currentTimeMillis();
	if (print) {
		System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ===");
	}
}

Source File: ReducePerformance.java From flink with Apache License 2.0

6 votes

private static <T, B extends CopyableIterator<T>> void testReducePerformance
	(B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();

	@SuppressWarnings("unchecked")
	DataSet<T> output =
		env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo)
			.groupBy("0")
			.reduce(new SumReducer()).setCombineHint(hint);

	long start = System.currentTimeMillis();

	System.out.println(output.count());

	long end = System.currentTimeMillis();
	if (print) {
		System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ===");
	}
}

Source File: ReduceOperator.java From flink with Apache License 2.0

6 votes

private static <T, K> org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateSelectorFunctionReducer(
	SelectorFunctionKeys<T, ?> rawKeys,
	ReduceFunction<T> function,
	TypeInformation<T> inputType,
	String name,
	Operator<T> input,
	int parallelism,
	CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<T, K> keys = (SelectorFunctionKeys<T, K>) rawKeys;

	TypeInformation<Tuple2<K, T>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, T>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<T, K> reducer = new PlanUnwrappingReduceOperator<>(function, keys, name, inputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setParallelism(parallelism);
	reducer.setCombineHint(hint);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: DistinctOperator.java From flink with Apache License 2.0

6 votes

private static <IN, K> org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateSelectorFunctionDistinct(
		SelectorFunctionKeys<IN, ?> rawKeys,
		ReduceFunction<IN> function,
		TypeInformation<IN> outputType,
		String name,
		Operator<IN> input,
		int parallelism,
		CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys;

	TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<IN, K> reducer =
			new PlanUnwrappingReduceOperator<>(function, keys, name, outputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setCombineHint(hint);
	reducer.setParallelism(parallelism);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: Simplify.java From flink with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.filter(new RemoveSelfLoops<>())
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: Simplify.java From flink with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.flatMap(new SymmetrizeAndRemoveSelfLoops<>(clipAndFlip))
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: Simplify.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.flatMap(new SymmetrizeAndRemoveSelfLoops<>(clipAndFlip))
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: Simplify.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.filter(new RemoveSelfLoops<>())
			.setParallelism(parallelism)
			.name("Remove self-loops")
		.distinct(0, 1)
			.setCombineHint(CombineHint.NONE)
			.setParallelism(parallelism)
			.name("Remove duplicate edges");

	// Graph
	return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

Source File: DistinctOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

private static <IN, K> org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateSelectorFunctionDistinct(
		SelectorFunctionKeys<IN, ?> rawKeys,
		ReduceFunction<IN> function,
		TypeInformation<IN> outputType,
		String name,
		Operator<IN> input,
		int parallelism,
		CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys;

	TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<IN, K> reducer =
			new PlanUnwrappingReduceOperator<>(function, keys, name, outputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setCombineHint(hint);
	reducer.setParallelism(parallelism);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: ReduceOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

private static <T, K> org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateSelectorFunctionReducer(
	SelectorFunctionKeys<T, ?> rawKeys,
	ReduceFunction<T> function,
	TypeInformation<T> inputType,
	String name,
	Operator<T> input,
	int parallelism,
	CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<T, K> keys = (SelectorFunctionKeys<T, K>) rawKeys;

	TypeInformation<Tuple2<K, T>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, T>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<T, K> reducer = new PlanUnwrappingReduceOperator<>(function, keys, name, inputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setParallelism(parallelism);
	reducer.setCombineHint(hint);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}

Source File: LocalClusteringCoefficient.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, v, w
	DataSet<TriangleListing.Result<K>> triangles = input
		.run(new TriangleListing<K, VV, EV>()
			.setParallelism(parallelism));

	// u, 1
	DataSet<Tuple2<K, LongValue>> triangleVertices = triangles
		.flatMap(new SplitTriangles<>())
			.name("Split triangle vertices");

	// u, triangle count
	DataSet<Tuple2<K, LongValue>> vertexTriangleCount = triangleVertices
		.groupBy(0)
		.reduce(new CountTriangles<>())
		.setCombineHint(CombineHint.HASH)
			.name("Count triangles")
			.setParallelism(parallelism);

	// u, deg(u)
	DataSet<Vertex<K, LongValue>> vertexDegree = input
		.run(new VertexDegree<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices.get())
			.setParallelism(parallelism));

	// u, deg(u), triangle count
	return vertexDegree
		.leftOuterJoin(vertexTriangleCount)
		.where(0)
		.equalTo(0)
		.with(new JoinVertexDegreeWithTriangleCount<>())
			.setParallelism(parallelism)
			.name("Clustering coefficient");
}

Source File: VertexDegree.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	MapFunction<Edge<K, EV>, Vertex<K, LongValue>> mapEdgeToId = reduceOnTargetId.get() ?
		new MapEdgeToTargetId<>() : new MapEdgeToSourceId<>();

	// v
	DataSet<Vertex<K, LongValue>> vertexIds = input
		.getEdges()
		.map(mapEdgeToId)
			.setParallelism(parallelism)
			.name("Edge to vertex ID");

	// v, deg(v)
	DataSet<Vertex<K, LongValue>> degree = vertexIds
		.groupBy(0)
		.reduce(new DegreeCount<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Degree count");

	if (includeZeroDegreeVertices.get()) {
		degree = input
			.getVertices()
			.leftOuterJoin(degree)
			.where(0)
			.equalTo(0)
			.with(new JoinVertexWithVertexDegree<>())
				.setParallelism(parallelism)
				.name("Zero degree vertices");
	}

	return degree;
}

Source File: ReduceITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testReduceOnTupleWithMultipleKeyExpressionsWithHashHint() throws Exception {
	/*
	 * Case 2 with String-based field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
		.groupBy("f4", "f0").reduce(new Tuple5Reduce()).setCombineHint(CombineHint.HASH);

	List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
		.collect();

	String expected = "1,1,0,Hallo,1\n" +
		"2,3,2,Hallo Welt wie,1\n" +
		"2,2,1,Hallo Welt,2\n" +
		"3,9,0,P-),2\n" +
		"3,6,5,BCD,3\n" +
		"4,17,0,P-),1\n" +
		"4,17,0,P-),2\n" +
		"5,11,10,GHI,1\n" +
		"5,29,0,P-),2\n" +
		"5,25,0,P-),3\n";

	compareResultAsTuples(result, expected);
}

Source File: EdgeMetrics.java From flink with Apache License 2.0

5 votes

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
		throws Exception {
	super.run(input);

	// s, t, (d(s), d(t))
	DataSet<Edge<K, Tuple3<EV, LongValue, LongValue>>> edgeDegreePair = input
		.run(new EdgeDegreePair<K, VV, EV>()
			.setReduceOnTargetId(reduceOnTargetId)
			.setParallelism(parallelism));

	// s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
	DataSet<Tuple3<K, LongValue, LongValue>> edgeStats = edgeDegreePair
		.map(new EdgeStats<>())
			.setParallelism(parallelism)
			.name("Edge stats")
		.groupBy(0)
		.reduce(new SumEdgeStats<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum edge stats");

	edgeMetricsHelper = new EdgeMetricsHelper<>();

	edgeStats
		.output(edgeMetricsHelper)
			.setParallelism(parallelism)
			.name("Edge metrics");

	return this;
}

Source File: EdgeMetrics.java From flink with Apache License 2.0

5 votes

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
		throws Exception {
	super.run(input);

	// s, t, (d(s), d(t))
	DataSet<Edge<K, Tuple3<EV, Degrees, Degrees>>> edgeDegreesPair = input
		.run(new EdgeDegreesPair<K, VV, EV>()
			.setParallelism(parallelism));

	// s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
	DataSet<Tuple3<K, Degrees, LongValue>> edgeStats = edgeDegreesPair
		.flatMap(new EdgeStats<>())
			.setParallelism(parallelism)
			.name("Edge stats")
		.groupBy(0, 1)
		.reduceGroup(new ReduceEdgeStats<>())
			.setParallelism(parallelism)
			.name("Reduce edge stats")
		.groupBy(0)
		.reduce(new SumEdgeStats<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum edge stats");

	edgeMetricsHelper = new EdgeMetricsHelper<>();

	edgeStats
		.output(edgeMetricsHelper)
			.setParallelism(parallelism)
			.name("Edge metrics");

	return this;
}

Source File: LocalClusteringCoefficient.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, v, w, bitmask
	DataSet<TriangleListing.Result<K>> triangles = input
		.run(new TriangleListing<K, VV, EV>()
			.setParallelism(parallelism));

	// u, edge count
	DataSet<Tuple2<K, LongValue>> triangleVertices = triangles
		.flatMap(new SplitTriangles<>())
			.name("Split triangle vertices");

	// u, triangle count
	DataSet<Tuple2<K, LongValue>> vertexTriangleCount = triangleVertices
		.groupBy(0)
		.reduce(new CountTriangles<>())
		.setCombineHint(CombineHint.HASH)
			.name("Count triangles")
			.setParallelism(parallelism);

	// u, deg(u)
	DataSet<Vertex<K, Degrees>> vertexDegree = input
		.run(new VertexDegrees<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices.get())
			.setParallelism(parallelism));

	// u, deg(u), triangle count
	return vertexDegree
		.leftOuterJoin(vertexTriangleCount)
		.where(0)
		.equalTo(0)
		.with(new JoinVertexDegreeWithTriangleCount<>())
			.setParallelism(parallelism)
			.name("Clustering coefficient");
}

Source File: LocalClusteringCoefficient.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, v, w
	DataSet<TriangleListing.Result<K>> triangles = input
		.run(new TriangleListing<K, VV, EV>()
			.setParallelism(parallelism));

	// u, 1
	DataSet<Tuple2<K, LongValue>> triangleVertices = triangles
		.flatMap(new SplitTriangles<>())
			.name("Split triangle vertices");

	// u, triangle count
	DataSet<Tuple2<K, LongValue>> vertexTriangleCount = triangleVertices
		.groupBy(0)
		.reduce(new CountTriangles<>())
		.setCombineHint(CombineHint.HASH)
			.name("Count triangles")
			.setParallelism(parallelism);

	// u, deg(u)
	DataSet<Vertex<K, LongValue>> vertexDegree = input
		.run(new VertexDegree<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices.get())
			.setParallelism(parallelism));

	// u, deg(u), triangle count
	return vertexDegree
		.leftOuterJoin(vertexTriangleCount)
		.where(0)
		.equalTo(0)
		.with(new JoinVertexDegreeWithTriangleCount<>())
			.setParallelism(parallelism)
			.name("Clustering coefficient");
}

Source File: EdgeMetrics.java From flink with Apache License 2.0

5 votes

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
		throws Exception {
	super.run(input);

	// s, t, (d(s), d(t))
	DataSet<Edge<K, Tuple3<EV, Degrees, Degrees>>> edgeDegreesPair = input
		.run(new EdgeDegreesPair<K, VV, EV>()
			.setParallelism(parallelism));

	// s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
	DataSet<Tuple3<K, Degrees, LongValue>> edgeStats = edgeDegreesPair
		.flatMap(new EdgeStats<>())
			.setParallelism(parallelism)
			.name("Edge stats")
		.groupBy(0, 1)
		.reduceGroup(new ReduceEdgeStats<>())
			.setParallelism(parallelism)
			.name("Reduce edge stats")
		.groupBy(0)
		.reduce(new SumEdgeStats<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum edge stats");

	edgeMetricsHelper = new EdgeMetricsHelper<>();

	edgeStats
		.output(edgeMetricsHelper)
			.setParallelism(parallelism)
			.name("Edge metrics");

	return this;
}

Source File: EdgeMetrics.java From flink with Apache License 2.0

5 votes

@Override
public EdgeMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
		throws Exception {
	super.run(input);

	// s, t, (d(s), d(t))
	DataSet<Edge<K, Tuple3<EV, LongValue, LongValue>>> edgeDegreePair = input
		.run(new EdgeDegreePair<K, VV, EV>()
			.setReduceOnTargetId(reduceOnTargetId)
			.setParallelism(parallelism));

	// s, d(s), count of (u, v) where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
	DataSet<Tuple3<K, LongValue, LongValue>> edgeStats = edgeDegreePair
		.map(new EdgeStats<>())
			.setParallelism(parallelism)
			.name("Edge stats")
		.groupBy(0)
		.reduce(new SumEdgeStats<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum edge stats");

	edgeMetricsHelper = new EdgeMetricsHelper<>();

	edgeStats
		.output(edgeMetricsHelper)
			.setParallelism(parallelism)
			.name("Edge metrics");

	return this;
}

Source File: VertexInDegree.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// t
	DataSet<Vertex<K, LongValue>> targetIds = input
		.getEdges()
		.map(new MapEdgeToTargetId<>())
			.setParallelism(parallelism)
			.name("Edge to target ID");

	// t, d(t)
	DataSet<Vertex<K, LongValue>> targetDegree = targetIds
		.groupBy(0)
		.reduce(new DegreeCount<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Degree count");

	if (includeZeroDegreeVertices.get()) {
		targetDegree = input.getVertices()
			.leftOuterJoin(targetDegree)
			.where(0)
			.equalTo(0)
			.with(new JoinVertexWithVertexDegree<>())
				.setParallelism(parallelism)
				.name("Zero degree vertices");
	}

	return targetDegree;
}

Source File: ReduceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testReduceOnTupleWithMultipleKeyExpressionsWithHashHint() throws Exception {
	/*
	 * Case 2 with String-based field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
		.groupBy("f4", "f0").reduce(new Tuple5Reduce()).setCombineHint(CombineHint.HASH);

	List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
		.collect();

	String expected = "1,1,0,Hallo,1\n" +
		"2,3,2,Hallo Welt wie,1\n" +
		"2,2,1,Hallo Welt,2\n" +
		"3,9,0,P-),2\n" +
		"3,6,5,BCD,3\n" +
		"4,17,0,P-),1\n" +
		"4,17,0,P-),2\n" +
		"5,11,10,GHI,1\n" +
		"5,29,0,P-),2\n" +
		"5,25,0,P-),3\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceOperator.java From flink with Apache License 2.0

5 votes

public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function, String defaultName) {
	super(input.getInputDataSet(), input.getInputDataSet().getType());

	this.function = function;
	this.grouper = input;
	this.defaultName = defaultName;
	this.hint = CombineHint.OPTIMIZER_CHOOSES;
}

Source File: LocalClusteringCoefficient.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, v, w, bitmask
	DataSet<TriangleListing.Result<K>> triangles = input
		.run(new TriangleListing<K, VV, EV>()
			.setParallelism(parallelism));

	// u, edge count
	DataSet<Tuple2<K, LongValue>> triangleVertices = triangles
		.flatMap(new SplitTriangles<>())
			.name("Split triangle vertices");

	// u, triangle count
	DataSet<Tuple2<K, LongValue>> vertexTriangleCount = triangleVertices
		.groupBy(0)
		.reduce(new CountTriangles<>())
		.setCombineHint(CombineHint.HASH)
			.name("Count triangles")
			.setParallelism(parallelism);

	// u, deg(u)
	DataSet<Vertex<K, Degrees>> vertexDegree = input
		.run(new VertexDegrees<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices.get())
			.setParallelism(parallelism));

	// u, deg(u), triangle count
	return vertexDegree
		.leftOuterJoin(vertexTriangleCount)
		.where(0)
		.equalTo(0)
		.with(new JoinVertexDegreeWithTriangleCount<>())
			.setParallelism(parallelism)
			.name("Clustering coefficient");
}

Source File: VertexDegree.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	MapFunction<Edge<K, EV>, Vertex<K, LongValue>> mapEdgeToId = reduceOnTargetId.get() ?
		new MapEdgeToTargetId<>() : new MapEdgeToSourceId<>();

	// v
	DataSet<Vertex<K, LongValue>> vertexIds = input
		.getEdges()
		.map(mapEdgeToId)
			.setParallelism(parallelism)
			.name("Edge to vertex ID");

	// v, deg(v)
	DataSet<Vertex<K, LongValue>> degree = vertexIds
		.groupBy(0)
		.reduce(new DegreeCount<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Degree count");

	if (includeZeroDegreeVertices.get()) {
		degree = input
			.getVertices()
			.leftOuterJoin(degree)
			.where(0)
			.equalTo(0)
			.with(new JoinVertexWithVertexDegree<>())
				.setParallelism(parallelism)
				.name("Zero degree vertices");
	}

	return degree;
}

Source File: VertexOutDegree.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// s
	DataSet<Vertex<K, LongValue>> sourceIds = input
		.getEdges()
		.map(new MapEdgeToSourceId<>())
			.setParallelism(parallelism)
			.name("Edge to source ID");

	// s, d(s)
	DataSet<Vertex<K, LongValue>> sourceDegree = sourceIds
		.groupBy(0)
		.reduce(new DegreeCount<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Degree count");

	if (includeZeroDegreeVertices.get()) {
		sourceDegree = input.getVertices()
			.leftOuterJoin(sourceDegree)
			.where(0)
			.equalTo(0)
			.with(new JoinVertexWithVertexDegree<>())
				.setParallelism(parallelism)
				.name("Zero degree vertices");
	}

	return sourceDegree;
}

org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint Java Examples