org.apache.flink.api.java.operators.CoGroupOperator Java Exaples

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: CoGroupConnectedComponentsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Source File: VertexCentricIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Source File: VertexCentricIteration.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Source File: VertexCentricIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Source File: CoGroupOperatorTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Source File: CoGroupOperatorTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Source File: DataSet.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}

org.apache.flink.api.java.operators.CoGroupOperator Java Examples