org.apache.flink.api.java.operators.DeltaIteration Java Exaples

Source File: VertexCentricIteration.java From flink with Apache License 2.0

6 votes

/**
 * Helper method which sets up an iteration with the given vertex value.
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Vertex-centric iteration (" + computeFunction + ")");
	}
}

Source File: VertexCentricIteration.java From flink with Apache License 2.0

6 votes

/**
 * Helper method which sets up an iteration with the given vertex value.
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Vertex-centric iteration (" + computeFunction + ")");
	}
}

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}

Source File: DeltaIterationNotDependingOnSolutionSetITCase.java From flink with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(1);

		DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);

		iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
				.output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result));

		env.execute();
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

6 votes

/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

6 votes

/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);
		env.getConfig().disableSysoutLogging();

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}

Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: WorksetIterationsRecordApiCompilerTest.java From flink with Apache License 2.0

5 votes

private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWorksetIterationPipelineBreakerPlacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		// the workset (input two of the delta iteration) is the same as what is consumed be the successive join
		DataSet<Tuple2<Long, Long>> initialWorkset = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
		
		DataSet<Tuple2<Long, Long>> initialSolutionSet = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
		
		// trivial iteration, since we are interested in the inputs to the iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialWorkset, 100, 0);
		
		DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>());
		
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(next, next);
		
		initialWorkset
			.join(result, JoinHint.REPARTITION_HASH_FIRST)
			.where(0).equalTo(0)
			.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
		
		Plan p = env.createProgramPlan();
		compileNoStats(p);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSolutionSetDeltaDependsOnBroadcastVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> source =
					env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		DataSet<Tuple2<Long, Long>> invariantInput =
				env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		// iteration from here
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1);
		
		DataSet<Tuple2<Long, Long>> result =
			invariantInput
				.map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data")
				.join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1);
		
		iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result)
				.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		OptimizedPlan p = compileNoStats(env.createProgramPlan());
		
		// check that the JSON generator accepts this plan
		new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p);
		
		// check that the JobGraphGenerator accepts the plan
		new JobGraphGenerator().compileJobGraph(p);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: TempInIterationsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ConnectedComponentsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: PartitionITCase.java From flink with Apache License 2.0

5 votes

@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {

	// does not apply for collection execution
	if (super.mode == TestExecutionMode.COLLECTION) {
		throw new InvalidProgramException("Does not apply for collection execution");
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSource<Long> source = env.generateSequence(0, 10000);

	DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
		@Override
		public Tuple2<Long, String> map(Long v) throws Exception {
			return new Tuple2<>(v, Long.toString(v));
		}
	});

	DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
	DataSet<Tuple2<Long, String>> body = it.getWorkset()
		.partitionByRange(1) // Verify that range partition is not allowed in iteration
		.join(it.getSolutionSet())
		.where(0).equalTo(0).projectFirst(0).projectSecond(1);
	DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);

	result.collect(); // should fail
}

Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
	/*
	 * Test convergence criterion with parameter for iterate delta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterionWithParam(3));

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).projectFirst(0, 1);

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}

Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
		initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}

Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: WorksetIterationCornerCasesTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: WorksetIterationsRecordApiCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Source File: IterationCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

org.apache.flink.api.java.operators.DeltaIteration Java Examples