org.apache.flink.optimizer.plan.NAryUnionPlanNode Java Examples
The following examples show how to use
org.apache.flink.optimizer.plan.NAryUnionPlanNode.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BinaryUnionReplacer.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public void collect(Channel in, List<Channel> inputs) { if (in.getSource() instanceof NAryUnionPlanNode) { // sanity check if (in.getShipStrategy() != ShipStrategyType.FORWARD) { throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators."); } if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) { throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators."); } inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs()); } else { // is not a collapsed union node, so we take the channel directly inputs.add(in); } }
Example #2
Source File: BinaryUnionReplacer.java From flink with Apache License 2.0 | 6 votes |
public void collect(Channel in, List<Channel> inputs) { if (in.getSource() instanceof NAryUnionPlanNode) { // sanity check if (in.getShipStrategy() != ShipStrategyType.FORWARD) { throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators."); } if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) { throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators."); } inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs()); } else { // is not a collapsed union node, so we take the channel directly inputs.add(in); } }
Example #3
Source File: BinaryUnionReplacer.java From flink with Apache License 2.0 | 6 votes |
public void collect(Channel in, List<Channel> inputs) { if (in.getSource() instanceof NAryUnionPlanNode) { // sanity check if (in.getShipStrategy() != ShipStrategyType.FORWARD) { throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators."); } if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) { throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators."); } inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs()); } else { // is not a collapsed union node, so we take the channel directly inputs.add(in); } }
Example #4
Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testWorksetIterationWithUnionRoot() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) , iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) ) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource(); // make sure that the root is part of the dynamic path // the "NoOp"a that come after the union. SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode(); SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode(); NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource(); NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource(); assertTrue(nextWorksetNoop.isOnDynamicPath()); assertTrue(nextWorksetNoop.getCostWeight() >= 1); assertTrue(solutionDeltaNoop.isOnDynamicPath()); assertTrue(solutionDeltaNoop.getCostWeight() >= 1); assertTrue(nextWorksetUnion.isOnDynamicPath()); assertTrue(nextWorksetUnion.getCostWeight() >= 1); assertTrue(solutionDeltaUnion.isOnDynamicPath()); assertTrue(solutionDeltaUnion.getCostWeight() >= 1); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #5
Source File: UnionReplacementTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Test the input and output shipping strategies for union operators with input and output * operators with different parallelisms. * * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out * X * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out * * The union operator must always have the same parallelism as its successor and connect to it * with a FORWARD strategy. * In this program, the input connections for union should be FORWARD for parallelism-preserving * connections and PARTITION_RANDOM for parallelism-changing connections. * */ @Test public void testUnionInputOutputDifferentDOP() throws Exception { int fullDop = DEFAULT_PARALLELISM; int halfDop = DEFAULT_PARALLELISM / 2; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull"); DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf"); DataSet<Tuple2<Long, Long>> union = in1.union(in2); DataSet<Tuple2<Long, Long>> dopFullMap = union .map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull"); DataSet<Tuple2<Long, Long>> dopHalfMap = union .map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf"); dopFullMap.output(new DiscardingOutputFormat<>()); dopHalfMap.output(new DiscardingOutputFormat<>()); // ----------------------------------------------------------------------------------------- // Verify optimized plan // ----------------------------------------------------------------------------------------- OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan()); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan); SingleInputPlanNode inDopFull = resolver.getNode("inDopFull"); SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf"); SingleInputPlanNode outDopFull = resolver.getNode("outDopFull"); SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf"); NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource(); NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource(); // check in map nodes assertEquals(2, inDopFull.getOutgoingChannels().size()); assertEquals(2, inDopHalf.getOutgoingChannels().size()); assertEquals(fullDop, inDopFull.getParallelism()); assertEquals(halfDop, inDopHalf.getParallelism()); // check union nodes assertEquals(fullDop, unionDopFull.getParallelism()); assertEquals(halfDop, unionDopHalf.getParallelism()); // check out map nodes assertEquals(fullDop, outDopFull.getParallelism()); assertEquals(halfDop, outDopHalf.getParallelism()); // check Union -> outMap ship strategies assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy()); // check inMap -> Union ship strategies Channel fullFull; Channel fullHalf; Channel halfFull; Channel halfHalf; if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) { fullFull = inDopFull.getOutgoingChannels().get(0); fullHalf = inDopFull.getOutgoingChannels().get(1); } else { fullFull = inDopFull.getOutgoingChannels().get(1); fullHalf = inDopFull.getOutgoingChannels().get(0); } if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) { halfFull = inDopHalf.getOutgoingChannels().get(0); halfHalf = inDopHalf.getOutgoingChannels().get(1); } else { halfFull = inDopHalf.getOutgoingChannels().get(1); halfHalf = inDopHalf.getOutgoingChannels().get(0); } assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy()); }
Example #6
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }
Example #7
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byCountry", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }
Example #8
Source File: IterationCompilerTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWorksetIterationWithUnionRoot() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) , iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) ) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource(); // make sure that the root is part of the dynamic path // the "NoOp"a that come after the union. SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode(); SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode(); NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource(); NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource(); assertTrue(nextWorksetNoop.isOnDynamicPath()); assertTrue(nextWorksetNoop.getCostWeight() >= 1); assertTrue(solutionDeltaNoop.isOnDynamicPath()); assertTrue(solutionDeltaNoop.getCostWeight() >= 1); assertTrue(nextWorksetUnion.isOnDynamicPath()); assertTrue(nextWorksetUnion.getCostWeight() >= 1); assertTrue(solutionDeltaUnion.isOnDynamicPath()); assertTrue(solutionDeltaUnion.getCostWeight() >= 1); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #9
Source File: UnionReplacementTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test the input and output shipping strategies for union operators with input and output * operators with different parallelisms. * * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out * X * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out * * The union operator must always have the same parallelism as its successor and connect to it * with a FORWARD strategy. * In this program, the input connections for union should be FORWARD for parallelism-preserving * connections and PARTITION_RANDOM for parallelism-changing connections. * */ @Test public void testUnionInputOutputDifferentDOP() throws Exception { int fullDop = DEFAULT_PARALLELISM; int halfDop = DEFAULT_PARALLELISM / 2; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull"); DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf"); DataSet<Tuple2<Long, Long>> union = in1.union(in2); DataSet<Tuple2<Long, Long>> dopFullMap = union .map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull"); DataSet<Tuple2<Long, Long>> dopHalfMap = union .map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf"); dopFullMap.output(new DiscardingOutputFormat<>()); dopHalfMap.output(new DiscardingOutputFormat<>()); // ----------------------------------------------------------------------------------------- // Verify optimized plan // ----------------------------------------------------------------------------------------- OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan()); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan); SingleInputPlanNode inDopFull = resolver.getNode("inDopFull"); SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf"); SingleInputPlanNode outDopFull = resolver.getNode("outDopFull"); SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf"); NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource(); NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource(); // check in map nodes assertEquals(2, inDopFull.getOutgoingChannels().size()); assertEquals(2, inDopHalf.getOutgoingChannels().size()); assertEquals(fullDop, inDopFull.getParallelism()); assertEquals(halfDop, inDopHalf.getParallelism()); // check union nodes assertEquals(fullDop, unionDopFull.getParallelism()); assertEquals(halfDop, unionDopHalf.getParallelism()); // check out map nodes assertEquals(fullDop, outDopFull.getParallelism()); assertEquals(halfDop, outDopHalf.getParallelism()); // check Union -> outMap ship strategies assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy()); // check inMap -> Union ship strategies Channel fullFull; Channel fullHalf; Channel halfFull; Channel halfHalf; if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) { fullFull = inDopFull.getOutgoingChannels().get(0); fullHalf = inDopFull.getOutgoingChannels().get(1); } else { fullFull = inDopFull.getOutgoingChannels().get(1); fullHalf = inDopFull.getOutgoingChannels().get(0); } if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) { halfFull = inDopHalf.getOutgoingChannels().get(0); halfHalf = inDopHalf.getOutgoingChannels().get(1); } else { halfFull = inDopHalf.getOutgoingChannels().get(1); halfHalf = inDopHalf.getOutgoingChannels().get(0); } assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy()); }
Example #10
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }
Example #11
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byCountry", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }
Example #12
Source File: IterationCompilerTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWorksetIterationWithUnionRoot() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) , iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()) .union( iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())) ) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource(); // make sure that the root is part of the dynamic path // the "NoOp"a that come after the union. SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode(); SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode(); NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource(); NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource(); assertTrue(nextWorksetNoop.isOnDynamicPath()); assertTrue(nextWorksetNoop.getCostWeight() >= 1); assertTrue(solutionDeltaNoop.isOnDynamicPath()); assertTrue(solutionDeltaNoop.getCostWeight() >= 1); assertTrue(nextWorksetUnion.isOnDynamicPath()); assertTrue(nextWorksetUnion.getCostWeight() >= 1); assertTrue(solutionDeltaUnion.isOnDynamicPath()); assertTrue(solutionDeltaUnion.getCostWeight() >= 1); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #13
Source File: UnionReplacementTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test the input and output shipping strategies for union operators with input and output * operators with different parallelisms. * * Src1 - Map(fullP) -\-/- Union - Map(fullP) - Out * X * Src2 - Map(halfP) -/-\- Union - Map(halfP) - Out * * The union operator must always have the same parallelism as its successor and connect to it * with a FORWARD strategy. * In this program, the input connections for union should be FORWARD for parallelism-preserving * connections and PARTITION_RANDOM for parallelism-changing connections. * */ @Test public void testUnionInputOutputDifferentDOP() throws Exception { int fullDop = DEFAULT_PARALLELISM; int halfDop = DEFAULT_PARALLELISM / 2; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> in1 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(fullDop).name("inDopFull"); DataSet<Tuple2<Long, Long>> in2 = env.fromElements(new Tuple2<>(0L, 0L)) .map(new IdentityMapper<>()).setParallelism(halfDop).name("inDopHalf"); DataSet<Tuple2<Long, Long>> union = in1.union(in2); DataSet<Tuple2<Long, Long>> dopFullMap = union .map(new IdentityMapper<>()).setParallelism(fullDop).name("outDopFull"); DataSet<Tuple2<Long, Long>> dopHalfMap = union .map(new IdentityMapper<>()).setParallelism(halfDop).name("outDopHalf"); dopFullMap.output(new DiscardingOutputFormat<>()); dopHalfMap.output(new DiscardingOutputFormat<>()); // ----------------------------------------------------------------------------------------- // Verify optimized plan // ----------------------------------------------------------------------------------------- OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan()); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan); SingleInputPlanNode inDopFull = resolver.getNode("inDopFull"); SingleInputPlanNode inDopHalf = resolver.getNode("inDopHalf"); SingleInputPlanNode outDopFull = resolver.getNode("outDopFull"); SingleInputPlanNode outDopHalf = resolver.getNode("outDopHalf"); NAryUnionPlanNode unionDopFull = (NAryUnionPlanNode) outDopFull.getInput().getSource(); NAryUnionPlanNode unionDopHalf = (NAryUnionPlanNode) outDopHalf.getInput().getSource(); // check in map nodes assertEquals(2, inDopFull.getOutgoingChannels().size()); assertEquals(2, inDopHalf.getOutgoingChannels().size()); assertEquals(fullDop, inDopFull.getParallelism()); assertEquals(halfDop, inDopHalf.getParallelism()); // check union nodes assertEquals(fullDop, unionDopFull.getParallelism()); assertEquals(halfDop, unionDopHalf.getParallelism()); // check out map nodes assertEquals(fullDop, outDopFull.getParallelism()); assertEquals(halfDop, outDopHalf.getParallelism()); // check Union -> outMap ship strategies assertEquals(ShipStrategyType.FORWARD, outDopHalf.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, outDopFull.getInput().getShipStrategy()); // check inMap -> Union ship strategies Channel fullFull; Channel fullHalf; Channel halfFull; Channel halfHalf; if (inDopFull.getOutgoingChannels().get(0).getTarget() == unionDopFull) { fullFull = inDopFull.getOutgoingChannels().get(0); fullHalf = inDopFull.getOutgoingChannels().get(1); } else { fullFull = inDopFull.getOutgoingChannels().get(1); fullHalf = inDopFull.getOutgoingChannels().get(0); } if (inDopHalf.getOutgoingChannels().get(0).getTarget() == unionDopFull) { halfFull = inDopHalf.getOutgoingChannels().get(0); halfHalf = inDopHalf.getOutgoingChannels().get(1); } else { halfFull = inDopHalf.getOutgoingChannels().get(1); halfHalf = inDopHalf.getOutgoingChannels().get(0); } assertEquals(ShipStrategyType.FORWARD, fullFull.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, halfHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, fullHalf.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_RANDOM, halfFull.getShipStrategy()); }
Example #14
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }
Example #15
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void checkCoPartitionedSources2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class); data1.getSplitDataProperties() .splitsPartitionedBy("byCountry", 0); DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class); data2.getSplitDataProperties() .splitsPartitionedBy("byDate", 0); data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(0).getSource(); SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode)sinkNode.getPredecessor()).getListOfInputs().get(1).getSource(); GlobalProperties gprops1 = sourceNode1.getGlobalProperties(); LocalProperties lprops1 = sourceNode1.getLocalProperties(); GlobalProperties gprops2 = sourceNode2.getGlobalProperties(); LocalProperties lprops2 = sourceNode2.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops1.getGroupedFields() == null); Assert.assertTrue(lprops1.getOrdering() == null); Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING); Assert.assertTrue(lprops2.getGroupedFields() == null); Assert.assertTrue(lprops2.getOrdering() == null); Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner())); }