org.apache.flink.api.java.DataSet#iterateDelta

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
	/*
	 * Test convergence criterion with parameter for iterate delta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterionWithParam(3));

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).projectFirst(0, 1);

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: TempInIterationsTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: TempInIterationsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator with parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(result, expected);
}

Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}

Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: SuccessAfterNetworkBuffersFailureITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);
		env.getConfig().disableSysoutLogging();

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}

Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
		initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}

Source File: AggregatorConvergenceITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
		initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSolutionSetDeltaDependsOnBroadcastVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> source =
					env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		DataSet<Tuple2<Long, Long>> invariantInput =
				env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		// iteration from here
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1);
		
		DataSet<Tuple2<Long, Long>> result =
			invariantInput
				.map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data")
				.join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1);
		
		iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result)
				.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		OptimizedPlan p = compileNoStats(env.createProgramPlan());
		
		// check that the JSON generator accepts this plan
		new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p);
		
		// check that the JobGraphGenerator accepts the plan
		new JobGraphGenerator().compileJobGraph(p);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: TempInIterationsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: IterationCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testDeltaIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Integer, Integer>> solInput = env.fromElements(
				new Tuple2<Integer, Integer>(1, 0),
				new Tuple2<Integer, Integer>(2, 0),
				new Tuple2<Integer, Integer>(3, 0),
				new Tuple2<Integer, Integer>(4, 0));

		@SuppressWarnings("unchecked")
		DataSet<Tuple1<Integer>> workInput = env.fromElements(
				new Tuple1<Integer>(1),
				new Tuple1<Integer>(2),
				new Tuple1<Integer>(3),
				new Tuple1<Integer>(4));

		// Perform a delta iteration where we add those values to the workset where
		// the second tuple field is smaller than the first tuple field.
		// At the end both tuple fields must be the same.

		DeltaIteration<Tuple2<Integer, Integer>, Tuple1<Integer>> iteration =
			solInput.iterateDelta(workInput, 10, 0);

		DataSet<Tuple2<Integer, Integer>> solDelta = iteration.getSolutionSet().join(
				iteration.getWorkset()).where(0).equalTo(0).with(
				new JoinFunction<Tuple2<Integer, Integer>, Tuple1<Integer>, Tuple2<Integer, Integer>>() {

			@Override
			public Tuple2<Integer, Integer> join(Tuple2<Integer, Integer> first,
					Tuple1<Integer> second) throws Exception {
				return new Tuple2<Integer, Integer>(first.f0, first.f1 + 1);
			}
		});

		DataSet<Tuple1<Integer>> nextWorkset = solDelta.flatMap(
				new FlatMapFunction<Tuple2<Integer, Integer>, Tuple1<Integer>>() {
			@Override
			public void flatMap(Tuple2<Integer, Integer> in, Collector<Tuple1<Integer>>
					out) throws Exception {
				if (in.f1 < in.f0) {
					out.collect(new Tuple1<Integer>(in.f0));
				}
			}
		});

		List<Tuple2<Integer, Integer>> collected = new ArrayList<Tuple2<Integer, Integer>>();

		iteration.closeWith(solDelta, nextWorkset)
				.output(new LocalCollectionOutputFormat<Tuple2<Integer, Integer>>(collected));

		env.execute();

		// verify that both tuple fields are now the same
		for (Tuple2<Integer, Integer> t: collected) {
			assertEquals(t.f0, t.f1);
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testWorksetIterationWithUnionRoot() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			, iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
					iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			)
		.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
		
		// make sure that the root is part of the dynamic path
		
		// the "NoOp"a that come after the union.
		SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
		SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
		
		NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
		NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
		
		assertTrue(nextWorksetNoop.isOnDynamicPath());
		assertTrue(nextWorksetNoop.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaNoop.isOnDynamicPath());
		assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
		
		assertTrue(nextWorksetUnion.isOnDynamicPath());
		assertTrue(nextWorksetUnion.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaUnion.isOnDynamicPath());
		assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: AdaptivePageRank.java From flink-perf with Apache License 2.0

2 votes

public static void main(String[] args) throws Exception {

		long numVertices = 41652230;

		double threshold = 0.005 / numVertices;
		double dampeningFactor = 0.85;

		String adjacencyPath = args.length > 1 ? args[0] : "/data/demodata/pagerank/edges/edges.csv";
		String outpath = args.length > 2 ? args[1] : "/data/demodata/pagerank/adacency_comp";
		int numIterations = args.length > 3 ? Integer.valueOf(args[2]) : 100;

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	//	env.setDegreeOfParallelism(4);

		DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());
		DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());


		DataSet<Tuple2<Long, Double>> initialRanks = adjacency
				.flatMap(new InitialMessageBuilder(numVertices, dampeningFactor))
				.groupBy(0)
				.reduceGroup(new Agg());

		DataSet<Tuple2<Long, Double>> initialDeltas = initialRanks.map(new InitialDeltaBuilder(numVertices));


		// ---------- iterative part ---------

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> adaptiveIteration = initialRanks.iterateDelta(initialDeltas, numIterations, 0);

		DataSet<Tuple2<Long, Double>> deltas = adaptiveIteration.getWorkset()
				.join(adjacency2).where(0).equalTo(0).with(new DeltaDistributor(0.85))
				.groupBy(0)
				.reduceGroup(new AggAndFilter(threshold));

		DataSet<Tuple2<Long, Double>> rankUpdates = adaptiveIteration.getSolutionSet()
				.join(deltas).where(0).equalTo(0).with(new SolutionJoin());

		adaptiveIteration.closeWith(rankUpdates, deltas)
				.writeAsCsv(outpath + "_adapt", WriteMode.OVERWRITE);


//		System.out.println(env.getExecutionPlan());
		JobExecutionResult result = env.execute("Adaptive Page Rank");

		Map<String, Object> accumulators = result.getAllAccumulatorResults();
		List<String> keys = new ArrayList<String>(accumulators.keySet());
		Collections.sort(keys);
		for (String key : keys) {
			System.out.println(key + " : " + accumulators.get(key));
		}
	}

Java Code Examples for org.apache.flink.api.java.DataSet#iterateDelta()