org.apache.flink.graph.utils.Tuple2ToVertexMap Java Exaples

Source File: GraphCsvReader.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Source File: GraphCsvReader.java From flink with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Source File: GraphCsvReader.java From flink with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: SpargelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Source File: SpargelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Source File: Graph.java From flink with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}

Source File: Graph.java From flink with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}

Source File: Graph.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}

org.apache.flink.graph.utils.Tuple2ToVertexMap Java Examples