Java Code Examples for org.apache.flink.optimizer.plan.SinkPlanNode#getPredecessor()
The following examples show how to use
org.apache.flink.optimizer.plan.SinkPlanNode#getPredecessor() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource3() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0) .splitsOrderedBy(new int[]{1}, new Order[]{Order.ASCENDING}); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 2
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource8() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1") .splitsGroupedBy("f1.stringField"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 3
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 4
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1) .splitsOrderedBy(new int[]{1}, new Order[]{Order.ASCENDING}); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 5
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example 6
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource3() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1) .splitsGroupedBy(0); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 7
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind multiple map ops. */ @Test public void checkJoinWithReplicatedSourceInputBehindMultiMaps() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .mapPartition(new IdPMap()) .flatMap(new IdFlatMap()) .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 8
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source. */ @Test public void checkJoinWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 9
Source File: ParallelismChangeTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between map and reduce (hash). */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism2() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn); }
Example 10
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 11
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource8() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1") .splitsGroupedBy("f1.stringField"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 12
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource5() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f2") .splitsGroupedBy("f2"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 13
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsOrderedBy("f1", new Order[]{Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 14
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 15
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network * transit as well. */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism1() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType redIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn); }
Example 16
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 17
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void checkSinglePartitionedSource5() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.stringField"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 18
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void checkSinglePartitionedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField; f2"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2, 4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 19
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test public void checkSinglePartitionedSource4() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1"); data.output(new DiscardingOutputFormat<Tuple3<Long,SomePojo,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1, 2, 3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 20
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void checkSinglePartitionedSource3() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("*"); data.output(new DiscardingOutputFormat<Tuple3<Long,SomePojo,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1, 2, 3, 4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }