org.apache.flink.api.java.DataSet#iterate

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

6 votes

@Test
public void testBranchAfterIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> sourceA = env.generateSequence(0,1);

	IterativeDataSet<Long> loopHead = sourceA.iterate(10);
	DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper");
	DataSet<Long> loopRes = loopHead.closeWith(loopTail);

	loopRes.output(new DiscardingOutputFormat<Long>());
	loopRes.map(new IdentityMapper<Long>())
			.output(new DiscardingOutputFormat<Long>());

	Plan plan = env.createProgramPlan();

	try {
		compileNoStats(plan);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationIncompleteStaticPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterate() throws Exception {
	/*
	 * Test aggregator without parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: PipelineBreakerTest.java From flink with Apache License 2.0

4 votes

/**
 * 
 * 
 * 
 * <pre>
 *                                +----------- ITERATION ---------+
 *                                |                               |
 *                               +--+                           +----+
 *  (source 1) ----------------->|PS| ------------ +        +-->|next|---> (sink)
 *                               +--+              | (BC)   |   +----+
 *                                |                V        |     |
 *  (source 2) --> (map) --+------|-----------> (MAPPER) ---+     |
 *                         |      |                ^              |
 *                         |      |                | (BC)         |
 *                         |      +----------------|--------------+
 *                         |                       |
 *                         +--(map) --> (reduce) --+
 * </pre>
 */
@Test
public void testPipelineBreakerBroadcastedPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> initialSource = env.generateSequence(1, 10);
		IterativeDataSet<Long> iteration = initialSource.iterate(100);
		
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(iteration, "bc2")
						.withBroadcastSet(bcInput1, "bc1");
						
		
		iteration.closeWith(result).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
		
		assertEquals(TempMode.CACHED, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: BulkIterationWithAllReducerITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);

	IterativeDataSet<Integer> iteration = data.iterate(10);

	DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");

	final List<Integer> resultList = new ArrayList<Integer>();
	iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));

	env.execute();

	Assert.assertEquals(8, resultList.get(0).intValue());
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testResetPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> width = env.generateSequence(1, 10);
		DataSet<Long> update = env.generateSequence(1, 10);
		DataSet<Long> lastGradient = env.generateSequence(1, 10);
		
		DataSet<Long> init = width.union(update).union(lastGradient);
		
		IterativeDataSet<Long> iteration = init.iterate(10);
		
		width = iteration.filter(new IdFilter<Long>());
		update = iteration.filter(new IdFilter<Long>());
		lastGradient = iteration.filter(new IdFilter<Long>());
		
		DataSet<Long> gradient = width.map(new IdentityMapper<Long>());
		DataSet<Long> term = gradient.join(lastGradient)
							.where(new IdentityKeyExtractor<Long>())
							.equalTo(new IdentityKeyExtractor<Long>())
							.with(new JoinFunction<Long, Long, Long>() {
								public Long join(Long first, Long second) { return null; }
							});
		
		update = update.map(new RichMapFunction<Long, Long>() {
			public Long map(Long value) { return null; }
		}).withBroadcastSet(term, "some-name");
		
		DataSet<Long> result = iteration.closeWith(width.union(update).union(lastGradient));
		
		result.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: BroadcastVarInitializationITCase.java From flink with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);

	IterativeDataSet<Integer> iteration = data.iterate(10);

	DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");

	final List<Integer> resultList = new ArrayList<Integer>();
	iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));

	env.execute();

	Assert.assertEquals(8, resultList.get(0).intValue());
}

Source File: KMeansArbitraryDimension.java From flink-perf with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if(!parseParameters(args)) {
			return;
		}

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<Point> points = env
			.readTextFile(pointsPath)
			.map(new ConvertToPoint());


		DataSet<Centroid> centroids = env
			.readTextFile(centersPath)
			.map(new ConvertToCentroid());


		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());
		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		//clusteredPoints.writeAsCsv(outputPath, "\n", " ", FileSystem.WriteMode.OVERWRITE);
		clusteredPoints.writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE);
		// execute program
		env.setParallelism(dop);
		env.execute("KMeans Multi-Dimension");

	}

Source File: KMeans.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}

Source File: PageRank.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);

		final int numPages = params.getInt("numPages", PageRankData.getNumberOfPages());
		final int maxIterations = params.getInt("iterations", 10);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make the parameters available to the web ui
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Long> pagesInput = getPagesDataSet(env, params);
		DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env, params);

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / numPages)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(DAMPENING_FACTOR, numPages));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		// emit result
		if (params.has("output")) {
			finalPageRanks.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Basic Page Rank Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			finalPageRanks.print();
		}
	}

Source File: LinearRegression.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		final int iterations = params.getInt("iterations", 10);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input x data from elements
		DataSet<Data> data;
		if (params.has("input")) {
			// read data from CSV file
			data = env.readCsvFile(params.get("input"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.pojoType(Data.class);
		} else {
			System.out.println("Executing LinearRegression example with default input data set.");
			System.out.println("Use --input to specify file input.");
			data = LinearRegressionData.getDefaultDataDataSet(env);
		}

		// get the parameters from elements
		DataSet<Params> parameters = LinearRegressionData.getDefaultParamsDataSet(env);

		// set number of bulk iterations for SGD linear Regression
		IterativeDataSet<Params> loop = parameters.iterate(iterations);

		DataSet<Params> newParameters = data
				// compute a single step using every sample
				.map(new SubUpdate()).withBroadcastSet(loop, "parameters")
				// sum up all the steps
				.reduce(new UpdateAccumulator())
				// average the steps and update all parameters
				.map(new Update());

		// feed new parameters back into next iteration
		DataSet<Params> result = loop.closeWith(newParameters);

		// emit result
		if (params.has("output")) {
			result.writeAsText(params.get("output"));
			// execute program
			env.execute("Linear Regression example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0

3 votes

private Plan getTestPlanRightStatic(String strategy) {
	
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	
	DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
	
	DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
	
	IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
	
	Configuration joinStrategy = new Configuration();
	joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);
	
	if(!strategy.equals("")) {
		joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
	}
	
	DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);

	DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
	
	output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	
	return env.createProgramPlan();
	
}

Source File: IterationWithUnionITCase.java From flink with Apache License 2.0

3 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Integer, CoordVector>> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);

	IterativeDataSet<Tuple2<Integer, CoordVector>> iteration = initialInput.iterate(2);

	DataSet<Tuple2<Integer, CoordVector>> result = iteration.union(iteration).map(new IdentityMapper());

	iteration.closeWith(result).writeAsFormattedText(this.resultPath, new PointFormatter());

	env.execute();
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

3 votes

@Test
public void testBCVariableClosure() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
	
	
	DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc");
	
	
	IterativeDataSet<String> iteration = initialSolution.iterate(100);
	
	iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: BulkIterationTranslationTest.java From flink with Apache License 2.0

2 votes

@Test
public void testCorrectTranslation() {
	final String jobName = "Test JobName";

	final int numIterations = 13;

	final int defaultParallelism = 133;
	final int iterationParallelism = 77;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// ------------ construct the test program ------------------

	{
		env.setParallelism(defaultParallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));

		IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
		bulkIteration.setParallelism(iterationParallelism);

		// test that multiple iteration consumers are supported
		DataSet<Tuple3<Double, Long, String>> identity = bulkIteration
			.map(new IdentityMapper<Tuple3<Double, Long, String>>());

		DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);

		result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
		result.writeAsText("/dev/null");
	}

	Plan p = env.createProgramPlan(jobName);

	// ------------- validate the plan ----------------

	BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();

	assertEquals(jobName, p.getJobName());
	assertEquals(defaultParallelism, p.getDefaultParallelism());
	assertEquals(iterationParallelism, iteration.getParallelism());
}

Source File: PageRankStephan.java From flink-perf with Apache License 2.0

2 votes

public static void main(String[] args) throws Exception {

		String adjacencyPath = args[0]; //"/data/demodata/pagerank/adjacency/adjacency.csv";
		String outpath = args[1]; //"/home/cicero/Desktop/out.txt";


		int numIterations = Integer.valueOf(args[2]);
		long numVertices = Integer.valueOf(args[3]);


		final double threshold = 0.005 / numVertices;


		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());
		DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());

		DataSet<Tuple2<Long, Double>> initialRanks = adjacency.map(new VertexInitializer(1.0 / numVertices));


		IterativeDataSet<Tuple2<Long, Double>> iteration = initialRanks.iterate(numIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				.join(adjacency2, JoinHint.REPARTITION_HASH_SECOND).where(0).equalTo(0).with(new RankDistributor(0.85, numVertices))
				.groupBy(0)
				.reduceGroup(new Adder());

		DataSet<Integer> tc = iteration.join(newRanks).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Double>, Tuple2<Long, Double>, Integer>() {
			@Override
			public void join(Tuple2<Long, Double> longDoubleTuple2, Tuple2<Long, Double> longDoubleTuple22, Collector<Integer> collector) throws Exception {
				double delta = Math.abs(longDoubleTuple2.f1 - longDoubleTuple22.f1);
				if(delta > threshold) {
					collector.collect(1);
				}
			}
		});

		iteration.closeWith(newRanks, tc).writeAsCsv(outpath+"_fastbulk", WriteMode.OVERWRITE);

//		System.out.println(env.getExecutionPlan());

		env.execute("Page Rank Optimized");
	}

Java Code Examples for org.apache.flink.api.java.DataSet#iterate()