org.apache.flink.api.java.operators.DeltaIteration#closeWith

Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}

Source File: ConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}

Source File: ConnectedComponentsWithObjectMapITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}

Source File: ConnectedComponents.java From flink with Apache License 2.0

4 votes

public static void main(String... args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		final int maxIterations = params.getInt("iterations", 10);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read vertex and edge data
		DataSet<Long> vertices = getVertexDataSet(env, params);
		DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env, params).flatMap(new UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId =
			vertices.map(new DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0);

		// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
				.groupBy(0).aggregate(Aggregations.MIN, 1)
				.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Connected Components Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: VertexCentricIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

4 votes

public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0);
				
		DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0)
				.projectSecond(1);
		
		DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101());
		
		DataSet<Tuple2<Long, Long>> candidatesDependencies = 
				grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1);
		
		DataSet<Tuple2<Long, Long>> verticesWithNewComponents = 
				candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0)
				.with(new Join222())
				.groupBy(0).aggregate(Aggregations.MIN, 1);
		
		DataSet<Tuple2<Long, Long>> updatedComponentId = 
				verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());
		
		DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId);

		return depResult;
		
	}

Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: MultipleJoinsWithSolutionSetCompilerTest.java From Flink-CEPplus with Apache License 2.0

3 votes

public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);

		DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
				.join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
				.groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
				.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);

		DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);

		DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);

		return result;
	}

Source File: MultipleJoinsWithSolutionSetCompilerTest.java From flink with Apache License 2.0

3 votes

public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);

		DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
				.join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
				.groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
				.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);

		DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);

		DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);

		return result;
	}

Source File: NestedIterationsTest.java From flink with Apache License 2.0

3 votes

@Test
public void testDeltaIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0);
		
		DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>());
		
		DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>());
		
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0);
		
		DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0)
						.projectFirst(0).projectSecond(0);
		
		DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined);
		
		mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0

3 votes

@Test
public void testDeltaIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0);
		
		DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>());
		
		DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>());
		
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0);
		
		DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0)
						.projectFirst(0).projectSecond(0);
		
		DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined);
		
		mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Java Code Examples for org.apache.flink.api.java.operators.DeltaIteration#closeWith()