Java Code Examples for org.apache.flink.api.java.operators.DeltaIteration#closeWith()
The following examples show how to use
org.apache.flink.api.java.operators.DeltaIteration#closeWith() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<>()); env.execute(); }
Example 2
Source File: ConnectedComponentsITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 3
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * <pre> * (SRC A) (SRC B) (SRC C) * / \ / / \ * (SINK 1) (DELTA ITERATION) | (SINK 2) * / | \ / * (SINK 3) | (CROSS => NEXT WORKSET) * | | * (JOIN => SOLUTION SET DELTA) * </pre> */ @Test public void testClosureDeltaIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>()); sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0); DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set"); DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta"); DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset); result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 4
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * <pre> * (SRC A) (SRC B) (SRC C) * / \ / / \ * (SINK 1) (DELTA ITERATION) | (SINK 2) * / | \ / * (SINK 3) | (CROSS => NEXT WORKSET) * | | * (JOIN => SOLUTION SET DELTA) * </pre> */ @Test public void testClosureDeltaIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>()); sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0); DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set"); DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta"); DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset); result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 5
Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 6
Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 7
Source File: CoGroupConnectedComponentsSecondITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
Example 8
Source File: ConnectedComponentsWithObjectMapITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 9
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 10
Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
Example 11
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
Example 12
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
Example 13
Source File: ConnectedComponents.java From flink with Apache License 2.0 | 4 votes |
public static void main(String... args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final int maxIterations = params.getInt("iterations", 10); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // read vertex and edge data DataSet<Long> vertices = getVertexDataSet(env, params); DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env, params).flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", " "); // execute program env.execute("Connected Components Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 14
Source File: VertexCentricIteration.java From flink with Apache License 2.0 | 4 votes |
/** * Creates the operator that represents this vertex-centric graph computation. * * <p>The Pregel iteration is mapped to delta iteration as follows. * The solution set consists of the set of active vertices and the workset contains the set of messages * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex. * In the beginning of a superstep, the solution set is joined with the workset to produce * a dataset containing tuples of vertex state and messages (vertex inbox). * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges. * The output of the compute UDF contains both the new vertex values and the new messages produced. * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps. * * @return The operator that represents this vertex-centric graph computation. */ @Override public DataSet<Vertex<K, VV>> createResult() { if (this.initialVertices == null) { throw new IllegalStateException("The input data set has not been set."); } // prepare the type information TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0); TypeInformation<Tuple2<K, Message>> messageTypeInfo = new TupleTypeInfo<>(keyType, messageType); TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType(); TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo = new EitherTypeInfo<>(vertexType, messageTypeInfo); TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo = new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType); TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo = new TupleTypeInfo<>(keyType, nullableMsgTypeInfo); DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map( new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo); final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration = initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0); setUpIteration(iteration); // join with the current state to get vertex values DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs = iteration.getSolutionSet().join(iteration.getWorkset()) .where(0).equalTo(0) .with(new AppendVertexState<>()) .returns(new TupleTypeInfo<>( vertexType, nullableMsgTypeInfo)); VertexComputeUdf<K, VV, EV, Message> vertexUdf = new VertexComputeUdf<>(computeFunction, intermediateTypeInfo); CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation = verticesWithMsgs.coGroup(edgesWithValue) .where("f0.f0").equalTo(0) .with(vertexUdf); // compute the solution set delta DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap( new ProjectNewVertexValue<>()).returns(vertexType); // compute the inbox of each vertex for the next superstep (new workset) DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap( new ProjectMessages<>()).returns(workSetTypeInfo); DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages; // check if a combiner has been provided if (combineFunction != null) { MessageCombinerUdf<K, Message> combinerUdf = new MessageCombinerUdf<>(combineFunction, workSetTypeInfo); DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages .groupBy(0).reduceGroup(combinerUdf) .setCombinable(true); newWorkSet = combinedMessages; } // configure the compute function superstepComputation = superstepComputation.name("Compute Function"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) { superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0); } } return iteration.closeWith(solutionSetDelta, newWorkSet); }
Example 15
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 4 votes |
public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0); DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0) .projectSecond(1); DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101()); DataSet<Tuple2<Long, Long>> candidatesDependencies = grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0) .with(new Join222()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new FlatMapJoin()); DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId); return depResult; }
Example 16
Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0 | 4 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new UpdateComponentIdMatchNonPreserving()); DataSet<Tuple2<Long, Long>> delta; if (extraMapper) { delta = changes.map( // ID Mapper new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { private static final long serialVersionUID = -3929364091829757322L; @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception { return v; } }); } else { delta = changes; } // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 17
Source File: MultipleJoinsWithSolutionSetCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
Example 18
Source File: MultipleJoinsWithSolutionSetCompilerTest.java From flink with Apache License 2.0 | 3 votes |
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
Example 19
Source File: NestedIterationsTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void testDeltaIterationInClosure() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0); DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0); DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0) .projectFirst(0).projectSecond(0); DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined); mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan p = env.createProgramPlan(); // optimizer should be able to translate this OptimizedPlan op = compileNoStats(p); // job graph generator should be able to translate this new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 20
Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test public void testDeltaIterationInClosure() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L)); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0); DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0); DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0) .projectFirst(0).projectSecond(0); DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined); mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan p = env.createProgramPlan(); // optimizer should be able to translate this OptimizedPlan op = compileNoStats(p); // job graph generator should be able to translate this new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }