Java Code Examples for org.apache.flink.api.java.DataSet#iterateDelta()
The following examples show how to use
org.apache.flink.api.java.DataSet#iterateDelta() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception { /* * Test convergence criterion with parameter for iterate delta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3)); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).projectFirst(0, 1); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
Example 2
Source File: TempInIterationsTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testTempInIterationTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0); DataSet<Tuple2<Long, Long>> update = iteration.getWorkset() .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); iteration.closeWith(update, update) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jg = jgg.compileJobGraph(oPlan); boolean solutionSetUpdateChecked = false; for(JobVertex v : jg.getVertices()) { if(v.getName().equals("SolutionSet Delta")) { // check if input of solution set delta is temped TaskConfig tc = new TaskConfig(v.getConfiguration()); assertTrue(tc.isInputAsynchronouslyMaterialized(0)); solutionSetUpdateChecked = true; } } assertTrue(solutionSetUpdateChecked); }
Example 3
Source File: TempInIterationsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTempInIterationTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0); DataSet<Tuple2<Long, Long>> update = iteration.getWorkset() .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); iteration.closeWith(update, update) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jg = jgg.compileJobGraph(oPlan); boolean solutionSetUpdateChecked = false; for(JobVertex v : jg.getVertices()) { if(v.getName().equals("SolutionSet Delta")) { // check if input of solution set delta is temped TaskConfig tc = new TaskConfig(v.getConfiguration()); assertTrue(tc.isInputAsynchronouslyMaterialized(0)); solutionSetUpdateChecked = true; } } assertTrue(solutionSetUpdateChecked); }
Example 4
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithParameterForIterateDelta() throws Exception { /* * Test aggregator with parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregatorWithParameter(4); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(result, expected); }
Example 5
Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
Example 6
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 7
Source File: SuccessAfterNetworkBuffersFailureITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
Example 8
Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception { // name of the aggregator that checks for convergence final String updatedElements = "updated.elements.aggr"; // the iteration stops if less than this number of elements change value final long convergenceThreshold = 3; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0); // register the convergence criterion iteration.registerAggregationConvergenceCriterion(updatedElements, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold)); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0) .with(new NeighborWithComponentIDJoin()) .groupBy(0).min(1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new MinimumIdFilter(updatedElements)); List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect(); Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>()); assertEquals(expectedResult, result); }
Example 9
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
Example 10
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
Example 11
Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
Example 12
Source File: AggregatorConvergenceITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception { // name of the aggregator that checks for convergence final String updatedElements = "updated.elements.aggr"; // the iteration stops if less than this number of elements change value final long convergenceThreshold = 3; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0); // register the convergence criterion iteration.registerAggregationConvergenceCriterion(updatedElements, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold)); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0) .with(new NeighborWithComponentIDJoin()) .groupBy(0).min(1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new MinimumIdFilter(updatedElements)); List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect(); Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>()); assertEquals(expectedResult, result); }
Example 13
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSolutionSetDeltaDependsOnBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); DataSet<Tuple2<Long, Long>> invariantInput = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); // iteration from here DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1); DataSet<Tuple2<Long, Long>> result = invariantInput .map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data") .join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1); iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); OptimizedPlan p = compileNoStats(env.createProgramPlan()); // check that the JSON generator accepts this plan new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p); // check that the JobGraphGenerator accepts the plan new JobGraphGenerator().compileJobGraph(p); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 14
Source File: TempInIterationsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTempInIterationTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0); DataSet<Tuple2<Long, Long>> update = iteration.getWorkset() .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); iteration.closeWith(update, update) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jg = jgg.compileJobGraph(oPlan); boolean solutionSetUpdateChecked = false; for(JobVertex v : jg.getVertices()) { if(v.getName().equals("SolutionSet Delta")) { // check if input of solution set delta is temped TaskConfig tc = new TaskConfig(v.getConfiguration()); assertTrue(tc.isInputAsynchronouslyMaterialized(0)); solutionSetUpdateChecked = true; } } assertTrue(solutionSetUpdateChecked); }
Example 15
Source File: IterationCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testEmptyWorksetIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith(iter.getWorkset(), iter.getWorkset()) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 16
Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 17
Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testDeltaIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Integer, Integer>> solInput = env.fromElements( new Tuple2<Integer, Integer>(1, 0), new Tuple2<Integer, Integer>(2, 0), new Tuple2<Integer, Integer>(3, 0), new Tuple2<Integer, Integer>(4, 0)); @SuppressWarnings("unchecked") DataSet<Tuple1<Integer>> workInput = env.fromElements( new Tuple1<Integer>(1), new Tuple1<Integer>(2), new Tuple1<Integer>(3), new Tuple1<Integer>(4)); // Perform a delta iteration where we add those values to the workset where // the second tuple field is smaller than the first tuple field. // At the end both tuple fields must be the same. DeltaIteration<Tuple2<Integer, Integer>, Tuple1<Integer>> iteration = solInput.iterateDelta(workInput, 10, 0); DataSet<Tuple2<Integer, Integer>> solDelta = iteration.getSolutionSet().join( iteration.getWorkset()).where(0).equalTo(0).with( new JoinFunction<Tuple2<Integer, Integer>, Tuple1<Integer>, Tuple2<Integer, Integer>>() { @Override public Tuple2<Integer, Integer> join(Tuple2<Integer, Integer> first, Tuple1<Integer> second) throws Exception { return new Tuple2<Integer, Integer>(first.f0, first.f1 + 1); } }); DataSet<Tuple1<Integer>> nextWorkset = solDelta.flatMap( new FlatMapFunction<Tuple2<Integer, Integer>, Tuple1<Integer>>() { @Override public void flatMap(Tuple2<Integer, Integer> in, Collector<Tuple1<Integer>> out) throws Exception { if (in.f1 < in.f0) { out.collect(new Tuple1<Integer>(in.f0)); } } }); List<Tuple2<Integer, Integer>> collected = new ArrayList<Tuple2<Integer, Integer>>(); iteration.closeWith(solDelta, nextWorkset) .output(new LocalCollectionOutputFormat<Tuple2<Integer, Integer>>(collected)); env.execute(); // verify that both tuple fields are now the same for (Tuple2<Integer, Integer> t: collected) { assertEquals(t.f0, t.f1); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 18
Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0 | 4 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new UpdateComponentIdMatchNonPreserving()); DataSet<Tuple2<Long, Long>> delta; if (extraMapper) { delta = changes.map( // ID Mapper new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { private static final long serialVersionUID = -3929364091829757322L; @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception { return v; } }); } else { delta = changes; } // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 19
Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testWorksetIterationWithUnionRoot() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) , iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) ) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource(); // make sure that the root is part of the dynamic path // the "NoOp"a that come after the union. SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode(); SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode(); NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource(); NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource(); assertTrue(nextWorksetNoop.isOnDynamicPath()); assertTrue(nextWorksetNoop.getCostWeight() >= 1); assertTrue(solutionDeltaNoop.isOnDynamicPath()); assertTrue(solutionDeltaNoop.getCostWeight() >= 1); assertTrue(nextWorksetUnion.isOnDynamicPath()); assertTrue(nextWorksetUnion.getCostWeight() >= 1); assertTrue(solutionDeltaUnion.isOnDynamicPath()); assertTrue(solutionDeltaUnion.getCostWeight() >= 1); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 20
Source File: AdaptivePageRank.java From flink-perf with Apache License 2.0 | 2 votes |
public static void main(String[] args) throws Exception { long numVertices = 41652230; double threshold = 0.005 / numVertices; double dampeningFactor = 0.85; String adjacencyPath = args.length > 1 ? args[0] : "/data/demodata/pagerank/edges/edges.csv"; String outpath = args.length > 2 ? args[1] : "/data/demodata/pagerank/adacency_comp"; int numIterations = args.length > 3 ? Integer.valueOf(args[2]) : 100; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // env.setDegreeOfParallelism(4); DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder()); DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder()); DataSet<Tuple2<Long, Double>> initialRanks = adjacency .flatMap(new InitialMessageBuilder(numVertices, dampeningFactor)) .groupBy(0) .reduceGroup(new Agg()); DataSet<Tuple2<Long, Double>> initialDeltas = initialRanks.map(new InitialDeltaBuilder(numVertices)); // ---------- iterative part --------- DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> adaptiveIteration = initialRanks.iterateDelta(initialDeltas, numIterations, 0); DataSet<Tuple2<Long, Double>> deltas = adaptiveIteration.getWorkset() .join(adjacency2).where(0).equalTo(0).with(new DeltaDistributor(0.85)) .groupBy(0) .reduceGroup(new AggAndFilter(threshold)); DataSet<Tuple2<Long, Double>> rankUpdates = adaptiveIteration.getSolutionSet() .join(deltas).where(0).equalTo(0).with(new SolutionJoin()); adaptiveIteration.closeWith(rankUpdates, deltas) .writeAsCsv(outpath + "_adapt", WriteMode.OVERWRITE); // System.out.println(env.getExecutionPlan()); JobExecutionResult result = env.execute("Adaptive Page Rank"); Map<String, Object> accumulators = result.getAllAccumulatorResults(); List<String> keys = new ArrayList<String>(accumulators.keySet()); Collections.sort(keys); for (String key : keys) { System.out.println(key + " : " + accumulators.get(key)); } }