org.apache.flink.api.java.operators.DeltaIteration Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.DeltaIteration.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VertexCentricIteration.java From flink with Apache License 2.0 | 6 votes |
/** * Helper method which sets up an iteration with the given vertex value. * * @param iteration */ private void setUpIteration(DeltaIteration<?, ?> iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Vertex-centric iteration (" + computeFunction + ")"); } }
Example #2
Source File: VertexCentricIteration.java From flink with Apache License 2.0 | 6 votes |
/** * Helper method which sets up an iteration with the given vertex value. * * @param iteration */ private void setUpIteration(DeltaIteration<?, ?> iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Vertex-centric iteration (" + computeFunction + ")"); } }
Example #3
Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Helper method which sets up an iteration with the given vertex value(either simple or with degrees). * * @param iteration */ private void setUpIteration(DeltaIteration<?, ?> iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"); } }
Example #4
Source File: DeltaIterationNotDependingOnSolutionSetITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1); iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper())) .output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result)); env.execute(); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #5
Source File: ScatterGatherIteration.java From flink with Apache License 2.0 | 6 votes |
/** * Helper method which sets up an iteration with the given vertex value(either simple or with degrees). * * @param iteration */ private void setUpIteration(DeltaIteration<?, ?> iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"); } }
Example #6
Source File: ScatterGatherIteration.java From flink with Apache License 2.0 | 6 votes |
/** * Helper method which sets up an iteration with the given vertex value(either simple or with degrees). * * @param iteration */ private void setUpIteration(DeltaIteration<?, ?> iteration) { // set up the iteration operator if (this.configuration != null) { iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")")); iteration.parallelism(this.configuration.getParallelism()); iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory()); // register all aggregators for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) { iteration.registerAggregator(entry.getKey(), entry.getValue()); } } else { // no configuration provided; set default name iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"); } }
Example #7
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * <pre> * (SRC A) (SRC B) (SRC C) * / \ / / \ * (SINK 1) (DELTA ITERATION) | (SINK 2) * / | \ / * (SINK 3) | (CROSS => NEXT WORKSET) * | | * (JOIN => SOLUTION SET DELTA) * </pre> */ @Test public void testClosureDeltaIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>()); sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0); DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set"); DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta"); DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset); result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #8
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
Example #9
Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #10
Source File: WorksetIterationsRecordApiCompilerTest.java From flink with Apache License 2.0 | 5 votes |
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set"); DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset"); DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME); DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .name(JOIN_WITH_SOLUTION_SET); if(joinPreservesSolutionSet) { ((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*"); } DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>()) .withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME); if(mapBeforeSolutionDelta) { DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>()) .withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME); deltaIt.closeWith(mapper, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); } else { deltaIt.closeWith(join2, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); } return env.createProgramPlan(); }
Example #11
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationPipelineBreakerPlacement() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); // the workset (input two of the delta iteration) is the same as what is consumed be the successive join DataSet<Tuple2<Long, Long>> initialWorkset = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); // trivial iteration, since we are interested in the inputs to the iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialWorkset, 100, 0); DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()); DataSet<Tuple2<Long, Long>> result = iteration.closeWith(next, next); initialWorkset .join(result, JoinHint.REPARTITION_HASH_FIRST) .where(0).equalTo(0) .output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()); Plan p = env.createProgramPlan(); compileNoStats(p); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #12
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSolutionSetDeltaDependsOnBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); DataSet<Tuple2<Long, Long>> invariantInput = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); // iteration from here DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1); DataSet<Tuple2<Long, Long>> result = invariantInput .map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data") .join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1); iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); OptimizedPlan p = compileNoStats(env.createProgramPlan()); // check that the JSON generator accepts this plan new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p); // check that the JobGraphGenerator accepts the plan new JobGraphGenerator().compileJobGraph(p); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #13
Source File: TempInIterationsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTempInIterationTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0); DataSet<Tuple2<Long, Long>> update = iteration.getWorkset() .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); iteration.closeWith(update, update) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jg = jgg.compileJobGraph(oPlan); boolean solutionSetUpdateChecked = false; for(JobVertex v : jg.getVertices()) { if(v.getName().equals("SolutionSet Delta")) { // check if input of solution set delta is temped TaskConfig tc = new TaskConfig(v.getConfiguration()); assertTrue(tc.isInputAsynchronouslyMaterialized(0)); solutionSetUpdateChecked = true; } } assertTrue(solutionSetUpdateChecked); }
Example #14
Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #15
Source File: ConnectedComponentsITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example #16
Source File: ScatterGatherIteration.java From flink with Apache License 2.0 | 5 votes |
/** * Method that builds the scatter function using a coGroup operator for a simple vertex (without * degrees). * It afterwards configures the function with a custom name and broadcast variables. * * @param iteration * @param messageTypeInfo * @param whereArg the argument for the where within the coGroup * @param equalToArg the argument for the equalTo within the coGroup * @return the scatter function */ private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction( DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration, TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg, DataSet<LongValue> numberOfVertices) { // build the scatter function (co group) CoGroupOperator<?, ?, Tuple2<K, Message>> messages; ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger = new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg) .equalTo(equalToArg).with(messenger); // configure coGroup message function with name and broadcast variables messages = messages.name("Messaging"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) { messages = messages.withBroadcastSet(e.f1, e.f0); } if (this.configuration.isOptNumVertices()) { messages = messages.withBroadcastSet(numberOfVertices, "number of vertices"); } } return messages; }
Example #17
Source File: ScatterGatherIteration.java From flink with Apache License 2.0 | 5 votes |
/** * Method that builds the scatter function using a coGroup operator for a vertex * containing degree information. * It afterwards configures the function with a custom name and broadcast variables. * * @param iteration * @param messageTypeInfo * @param whereArg the argument for the where within the coGroup * @param equalToArg the argument for the equalTo within the coGroup * @return the scatter function */ private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees( DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration, TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg, DataSet<LongValue> numberOfVertices) { // build the scatter function (co group) CoGroupOperator<?, ?, Tuple2<K, Message>> messages; ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger = new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg) .equalTo(equalToArg).with(messenger); // configure coGroup message function with name and broadcast variables messages = messages.name("Messaging"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) { messages = messages.withBroadcastSet(e.f1, e.f0); } if (this.configuration.isOptNumVertices()) { messages = messages.withBroadcastSet(numberOfVertices, "number of vertices"); } } return messages; }
Example #18
Source File: PartitionITCase.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testRangePartitionInIteration() throws Exception { // does not apply for collection execution if (super.mode == TestExecutionMode.COLLECTION) { throw new InvalidProgramException("Does not apply for collection execution"); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> source = env.generateSequence(0, 10000); DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() { @Override public Tuple2<Long, String> map(Long v) throws Exception { return new Tuple2<>(v, Long.toString(v)); } }); DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0); DataSet<Tuple2<Long, String>> body = it.getWorkset() .partitionByRange(1) // Verify that range partition is not allowed in iteration .join(it.getSolutionSet()) .where(0).equalTo(0).projectFirst(0).projectSecond(1); DataSet<Tuple2<Long, String>> result = it.closeWith(body, body); result.collect(); // should fail }
Example #19
Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<>()); env.execute(); }
Example #20
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception { /* * Test convergence criterion with parameter for iterate delta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3)); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).projectFirst(0, 1); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
Example #21
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
Example #22
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
Example #23
Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception { // name of the aggregator that checks for convergence final String updatedElements = "updated.elements.aggr"; // the iteration stops if less than this number of elements change value final long convergenceThreshold = 3; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0); // register the convergence criterion iteration.registerAggregationConvergenceCriterion(updatedElements, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold)); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0) .with(new NeighborWithComponentIDJoin()) .groupBy(0).min(1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new MinimumIdFilter(updatedElements)); List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect(); Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>()); assertEquals(expectedResult, result); }
Example #24
Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
Example #25
Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices"); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges"); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception { return new Tuple2<>(value.f0, value.f0); } }).name("Assign Vertex Ids"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0); JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset() .join(edges).where(0).equalTo(0) .with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception { return new Tuple2<>(second.f1, first.f1); } }) .name("Join Candidate Id With Neighbor"); CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()) .name("min Id and Update"); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result"); env.execute("Workset Connected Components"); }
Example #26
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * <pre> * (SRC A) (SRC B) (SRC C) * / \ / / \ * (SINK 1) (DELTA ITERATION) | (SINK 2) * / | \ / * (SINK 3) | (CROSS => NEXT WORKSET) * | | * (JOIN => SOLUTION SET DELTA) * </pre> */ @Test public void testClosureDeltaIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>()); sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0); DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set"); DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta"); DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset); result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #27
Source File: WorksetIterationCornerCasesTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #28
Source File: WorksetIterationsRecordApiCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set"); DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset"); DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME); DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .name(JOIN_WITH_SOLUTION_SET); if(joinPreservesSolutionSet) { ((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*"); } DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>()) .withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME); if(mapBeforeSolutionDelta) { DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>()) .withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME); deltaIt.closeWith(mapper, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); } else { deltaIt.closeWith(join2, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); } return env.createProgramPlan(); }
Example #29
Source File: ScatterGatherIteration.java From flink with Apache License 2.0 | 5 votes |
/** * Method that builds the scatter function using a coGroup operator for a simple vertex (without * degrees). * It afterwards configures the function with a custom name and broadcast variables. * * @param iteration * @param messageTypeInfo * @param whereArg the argument for the where within the coGroup * @param equalToArg the argument for the equalTo within the coGroup * @return the scatter function */ private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction( DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration, TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg, DataSet<LongValue> numberOfVertices) { // build the scatter function (co group) CoGroupOperator<?, ?, Tuple2<K, Message>> messages; ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger = new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg) .equalTo(equalToArg).with(messenger); // configure coGroup message function with name and broadcast variables messages = messages.name("Messaging"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) { messages = messages.withBroadcastSet(e.f1, e.f0); } if (this.configuration.isOptNumVertices()) { messages = messages.withBroadcastSet(numberOfVertices, "number of vertices"); } } return messages; }
Example #30
Source File: IterationCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testEmptyWorksetIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith(iter.getWorkset(), iter.getWorkset()) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }