org.apache.flink.optimizer.plan.NAryUnionPlanNode Java Exaples

Source File: BinaryUnionReplacer.java From Flink-CEPplus with Apache License 2.0

6 votes

public void collect(Channel in, List<Channel> inputs) {
	if (in.getSource() instanceof NAryUnionPlanNode) {
		// sanity check
		if (in.getShipStrategy() != ShipStrategyType.FORWARD) {
			throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators.");
		}
		if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) {
			throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators.");
		}

		inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs());
	} else {
		// is not a collapsed union node, so we take the channel directly
		inputs.add(in);
	}
}

Source File: BinaryUnionReplacer.java From flink with Apache License 2.0

6 votes

public void collect(Channel in, List<Channel> inputs) {
	if (in.getSource() instanceof NAryUnionPlanNode) {
		// sanity check
		if (in.getShipStrategy() != ShipStrategyType.FORWARD) {
			throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators.");
		}
		if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) {
			throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators.");
		}

		inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs());
	} else {
		// is not a collapsed union node, so we take the channel directly
		inputs.add(in);
	}
}

Source File: BinaryUnionReplacer.java From flink with Apache License 2.0

6 votes

public void collect(Channel in, List<Channel> inputs) {
	if (in.getSource() instanceof NAryUnionPlanNode) {
		// sanity check
		if (in.getShipStrategy() != ShipStrategyType.FORWARD) {
			throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators.");
		}
		if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) {
			throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators.");
		}

		inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs());
	} else {
		// is not a collapsed union node, so we take the channel directly
		inputs.add(in);
	}
}

Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testWorksetIterationWithUnionRoot() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			, iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
					iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			)
		.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
		
		// make sure that the root is part of the dynamic path
		
		// the "NoOp"a that come after the union.
		SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
		SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
		
		NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
		NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
		
		assertTrue(nextWorksetNoop.isOnDynamicPath());
		assertTrue(nextWorksetNoop.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaNoop.isOnDynamicPath());
		assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
		
		assertTrue(nextWorksetUnion.isOnDynamicPath());
		assertTrue(nextWorksetUnion.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaUnion.isOnDynamicPath());
		assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: UnionReplacementTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Test the input and output shipping strategies for union operators with input and output
 * operators with different parallelisms.
 *
 * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out
 *                     X
 * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out
 *
 * The union operator must always have the same parallelism as its successor and connect to it
 * with a FORWARD strategy.
 * In this program, the input connections for union should be FORWARD for parallelism-preserving
 * connections and PARTITION_RANDOM for parallelism-changing connections.
 *
 */
@Test
public void testUnionInputOutputDifferentDOP() throws Exception {

	int fullDop = DEFAULT_PARALLELISM;
	int halfDop = DEFAULT_PARALLELISM / 2;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull");
	DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf");

	DataSet<Tuple2<Long, Long>> union = in1.union(in2);

	DataSet<Tuple2<Long, Long>> dopFullMap = union
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull");
	DataSet<Tuple2<Long, Long>> dopHalfMap = union
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf");

	dopFullMap.output(new DiscardingOutputFormat<>());
	dopHalfMap.output(new DiscardingOutputFormat<>());

	// -----------------------------------------------------------------------------------------
	// Verify optimized plan
	// -----------------------------------------------------------------------------------------

	OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());

	OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);

	SingleInputPlanNode inDopFull = resolver.getNode("inDopFull");
	SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf");
	SingleInputPlanNode outDopFull = resolver.getNode("outDopFull");
	SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf");
	NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource();
	NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource();

	// check in map nodes
	assertEquals(2, inDopFull.getOutgoingChannels().size());
	assertEquals(2, inDopHalf.getOutgoingChannels().size());
	assertEquals(fullDop, inDopFull.getParallelism());
	assertEquals(halfDop, inDopHalf.getParallelism());

	// check union nodes
	assertEquals(fullDop, unionDopFull.getParallelism());
	assertEquals(halfDop, unionDopHalf.getParallelism());

	// check out map nodes
	assertEquals(fullDop, outDopFull.getParallelism());
	assertEquals(halfDop, outDopHalf.getParallelism());

	// check Union -> outMap ship strategies
	assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy());

	// check inMap -> Union ship strategies
	Channel fullFull;
	Channel fullHalf;
	Channel halfFull;
	Channel halfHalf;

	if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		fullFull = inDopFull.getOutgoingChannels().get(0);
		fullHalf = inDopFull.getOutgoingChannels().get(1);
	} else {
		fullFull = inDopFull.getOutgoingChannels().get(1);
		fullHalf = inDopFull.getOutgoingChannels().get(0);
	}
	if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		halfFull = inDopHalf.getOutgoingChannels().get(0);
		halfHalf = inDopHalf.getOutgoingChannels().get(1);
	} else {
		halfFull = inDopHalf.getOutgoingChannels().get(1);
		halfHalf = inDopHalf.getOutgoingChannels().get(0);
	}

	assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy());
}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byCountry", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

Source File: IterationCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testWorksetIterationWithUnionRoot() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			, iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
					iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			)
		.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
		
		// make sure that the root is part of the dynamic path
		
		// the "NoOp"a that come after the union.
		SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
		SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
		
		NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
		NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
		
		assertTrue(nextWorksetNoop.isOnDynamicPath());
		assertTrue(nextWorksetNoop.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaNoop.isOnDynamicPath());
		assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
		
		assertTrue(nextWorksetUnion.isOnDynamicPath());
		assertTrue(nextWorksetUnion.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaUnion.isOnDynamicPath());
		assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: UnionReplacementTest.java From flink with Apache License 2.0

4 votes

/**
 * Test the input and output shipping strategies for union operators with input and output
 * operators with different parallelisms.
 *
 * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out
 *                     X
 * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out
 *
 * The union operator must always have the same parallelism as its successor and connect to it
 * with a FORWARD strategy.
 * In this program, the input connections for union should be FORWARD for parallelism-preserving
 * connections and PARTITION_RANDOM for parallelism-changing connections.
 *
 */
@Test
public void testUnionInputOutputDifferentDOP() throws Exception {

	int fullDop = DEFAULT_PARALLELISM;
	int halfDop = DEFAULT_PARALLELISM / 2;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull");
	DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf");

	DataSet<Tuple2<Long, Long>> union = in1.union(in2);

	DataSet<Tuple2<Long, Long>> dopFullMap = union
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull");
	DataSet<Tuple2<Long, Long>> dopHalfMap = union
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf");

	dopFullMap.output(new DiscardingOutputFormat<>());
	dopHalfMap.output(new DiscardingOutputFormat<>());

	// -----------------------------------------------------------------------------------------
	// Verify optimized plan
	// -----------------------------------------------------------------------------------------

	OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());

	OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);

	SingleInputPlanNode inDopFull = resolver.getNode("inDopFull");
	SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf");
	SingleInputPlanNode outDopFull = resolver.getNode("outDopFull");
	SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf");
	NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource();
	NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource();

	// check in map nodes
	assertEquals(2, inDopFull.getOutgoingChannels().size());
	assertEquals(2, inDopHalf.getOutgoingChannels().size());
	assertEquals(fullDop, inDopFull.getParallelism());
	assertEquals(halfDop, inDopHalf.getParallelism());

	// check union nodes
	assertEquals(fullDop, unionDopFull.getParallelism());
	assertEquals(halfDop, unionDopHalf.getParallelism());

	// check out map nodes
	assertEquals(fullDop, outDopFull.getParallelism());
	assertEquals(halfDop, outDopHalf.getParallelism());

	// check Union -> outMap ship strategies
	assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy());

	// check inMap -> Union ship strategies
	Channel fullFull;
	Channel fullHalf;
	Channel halfFull;
	Channel halfHalf;

	if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		fullFull = inDopFull.getOutgoingChannels().get(0);
		fullHalf = inDopFull.getOutgoingChannels().get(1);
	} else {
		fullFull = inDopFull.getOutgoingChannels().get(1);
		fullHalf = inDopFull.getOutgoingChannels().get(0);
	}
	if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		halfFull = inDopHalf.getOutgoingChannels().get(0);
		halfHalf = inDopHalf.getOutgoingChannels().get(1);
	} else {
		halfFull = inDopHalf.getOutgoingChannels().get(1);
		halfHalf = inDopHalf.getOutgoingChannels().get(0);
	}

	assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy());
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byCountry", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

Source File: IterationCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testWorksetIterationWithUnionRoot() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
				iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			, iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
			.union(
					iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
			)
		.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
		
		// make sure that the root is part of the dynamic path
		
		// the "NoOp"a that come after the union.
		SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
		SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
		
		NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
		NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
		
		assertTrue(nextWorksetNoop.isOnDynamicPath());
		assertTrue(nextWorksetNoop.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaNoop.isOnDynamicPath());
		assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
		
		assertTrue(nextWorksetUnion.isOnDynamicPath());
		assertTrue(nextWorksetUnion.getCostWeight() >= 1);
		
		assertTrue(solutionDeltaUnion.isOnDynamicPath());
		assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: UnionReplacementTest.java From flink with Apache License 2.0

4 votes

/**
 * Test the input and output shipping strategies for union operators with input and output
 * operators with different parallelisms.
 *
 * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out
 *                     X
 * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out
 *
 * The union operator must always have the same parallelism as its successor and connect to it
 * with a FORWARD strategy.
 * In this program, the input connections for union should be FORWARD for parallelism-preserving
 * connections and PARTITION_RANDOM for parallelism-changing connections.
 *
 */
@Test
public void testUnionInputOutputDifferentDOP() throws Exception {

	int fullDop = DEFAULT_PARALLELISM;
	int halfDop = DEFAULT_PARALLELISM / 2;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull");
	DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L))
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf");

	DataSet<Tuple2<Long, Long>> union = in1.union(in2);

	DataSet<Tuple2<Long, Long>> dopFullMap = union
		.map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull");
	DataSet<Tuple2<Long, Long>> dopHalfMap = union
		.map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf");

	dopFullMap.output(new DiscardingOutputFormat<>());
	dopHalfMap.output(new DiscardingOutputFormat<>());

	// -----------------------------------------------------------------------------------------
	// Verify optimized plan
	// -----------------------------------------------------------------------------------------

	OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());

	OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);

	SingleInputPlanNode inDopFull = resolver.getNode("inDopFull");
	SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf");
	SingleInputPlanNode outDopFull = resolver.getNode("outDopFull");
	SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf");
	NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource();
	NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource();

	// check in map nodes
	assertEquals(2, inDopFull.getOutgoingChannels().size());
	assertEquals(2, inDopHalf.getOutgoingChannels().size());
	assertEquals(fullDop, inDopFull.getParallelism());
	assertEquals(halfDop, inDopHalf.getParallelism());

	// check union nodes
	assertEquals(fullDop, unionDopFull.getParallelism());
	assertEquals(halfDop, unionDopHalf.getParallelism());

	// check out map nodes
	assertEquals(fullDop, outDopFull.getParallelism());
	assertEquals(halfDop, outDopHalf.getParallelism());

	// check Union -> outMap ship strategies
	assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy());

	// check inMap -> Union ship strategies
	Channel fullFull;
	Channel fullHalf;
	Channel halfFull;
	Channel halfHalf;

	if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		fullFull = inDopFull.getOutgoingChannels().get(0);
		fullHalf = inDopFull.getOutgoingChannels().get(1);
	} else {
		fullFull = inDopFull.getOutgoingChannels().get(1);
		fullHalf = inDopFull.getOutgoingChannels().get(0);
	}
	if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) {
		halfFull = inDopHalf.getOutgoingChannels().get(0);
		halfHalf = inDopHalf.getOutgoingChannels().get(1);
	} else {
		halfFull = inDopHalf.getOutgoingChannels().get(1);
		halfHalf = inDopHalf.getOutgoingChannels().get(0);
	}

	assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy());
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

4 votes

@Test
public void checkCoPartitionedSources2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data1 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data1.getSplitDataProperties()
			.splitsPartitionedBy("byCountry", 0);

	DataSource<Tuple2<Long, String>> data2 =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data2.getSplitDataProperties()
			.splitsPartitionedBy("byDate", 0);

	data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
	SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();

	GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
	LocalProperties lprops1 = sourceNode1.getLocalProperties();
	GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
	LocalProperties lprops2 = sourceNode2.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops1.getGroupedFields() == null);
	Assert.assertTrue(lprops1.getOrdering() == null);

	Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
	Assert.assertTrue(lprops2.getGroupedFields() == null);
	Assert.assertTrue(lprops2.getOrdering() == null);

	Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}

org.apache.flink.optimizer.plan.NAryUnionPlanNode Java Examples