org.apache.flink.optimizer.plan.SingleInputPlanNode Java Examples
The following examples show how to use
org.apache.flink.optimizer.plan.SingleInputPlanNode.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParallelismChangeTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between map and reduce (hash). */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism2() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn); }
Example #2
Source File: PipelineBreakerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testPipelineBreakerWithBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> result = source.map(new IdentityMapper<Long>()) .map(new IdentityMapper<Long>()) .withBroadcastSet(source, "bc"); result.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #3
Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoPartialSolutionFoundSingleInputOnly() { try { SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Source"); SourcePlanNode otherTarget = new SourcePlanNode(getSourceNode(), "Source"); Channel toMap1 = new Channel(target); toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap1.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP); Channel toMap2 = new Channel(map1); toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap2.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP); { GlobalProperties gp = new GlobalProperties(); LocalProperties lp = new LocalProperties(); FeedbackPropertiesMeetRequirementsReport report = map2.checkPartialSolutionPropertiesMet(otherTarget, gp, lp); assertTrue(report == NO_PARTIAL_SOLUTION); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #4
Source File: RelationalQueryCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void checkStandardStrategies(SingleInputPlanNode map, DualInputPlanNode join, SingleInputPlanNode combiner, SingleInputPlanNode reducer, SinkPlanNode sink) { // check ship strategies that are always fix Assert.assertEquals(ShipStrategyType.FORWARD, map.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // check the driver strategies that are always fix Assert.assertEquals(DriverStrategy.FLAT_MAP, map.getDriverStrategy()); Assert.assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reducer.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); if (combiner != null) { Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy()); Assert.assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy()); } }
Example #5
Source File: ParallelismChangeTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkPropertyHandlingWithDecreasingParallelism() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); env .generateSequence(0, 1).setParallelism(p * 2) .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); Assert.assertTrue("The no sorting local strategy.", LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() || LocalStrategy.SORT == map2Node.getInput().getLocalStrategy()); Assert.assertTrue("The no partitioning ship strategy.", ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() || ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy()); }
Example #6
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #7
Source File: RelationalQueryCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private boolean checkBroadcastShipStrategies(DualInputPlanNode join, SingleInputPlanNode reducer, SingleInputPlanNode combiner) { if (ShipStrategyType.BROADCAST == join.getInput1().getShipStrategy() && ShipStrategyType.FORWARD == join.getInput2().getShipStrategy() && ShipStrategyType.PARTITION_HASH == reducer.getInput().getShipStrategy()) { // check combiner Assert.assertNotNull("Plan should have a combiner", combiner); Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); return true; } else { return false; } }
Example #8
Source File: GroupingTupleTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #9
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkPropertyHandlingWithDecreasingParallelism() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); env .generateSequence(0, 1).setParallelism(p * 2) .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); Assert.assertTrue("The no sorting local strategy.", LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() || LocalStrategy.SORT == map2Node.getInput().getLocalStrategy()); Assert.assertTrue("The no partitioning ship strategy.", ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() || ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy()); }
Example #10
Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoPartialSolutionFoundTwoInputOperator() { try { SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Partial Solution"); SourcePlanNode source1 = new SourcePlanNode(getSourceNode(), "Source 1"); SourcePlanNode source2 = new SourcePlanNode(getSourceNode(), "Source 2"); Channel toMap1 = new Channel(source1); toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap1.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP); Channel toMap2 = new Channel(source2); toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap2.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP); Channel toJoin1 = new Channel(map1); Channel toJoin2 = new Channel(map2); toJoin1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toJoin1.setLocalStrategy(LocalStrategy.NONE); toJoin2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toJoin2.setLocalStrategy(LocalStrategy.NONE); DualInputPlanNode join = new DualInputPlanNode(getJoinNode(), "Join", toJoin1, toJoin2, DriverStrategy.HYBRIDHASH_BUILD_FIRST); FeedbackPropertiesMeetRequirementsReport report = join.checkPartialSolutionPropertiesMet(target, new GlobalProperties(), new LocalProperties()); assertEquals(NO_PARTIAL_SOLUTION, report); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #11
Source File: GroupingKeySelectorTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningKeySelectorGroupReduceSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> data = env.fromElements(new Tuple3<Integer, Integer, Integer>(0, 0, 0)) .rebalance().setParallelism(4); data.groupBy(new TestKeySelector<Tuple3<Integer,Integer,Integer>>()) .withPartitioner(new TestPartitionerInt()) .sortGroup(new TestKeySelector<Tuple3<Integer, Integer, Integer>>(), Order.ASCENDING) .reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Integer,Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #12
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #13
Source File: BroadcastVariablePipelinebreakerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testBreakerForDependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).map(new IdentityMapper<String>()).withBroadcastSet(source1, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode beforeMapper = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, beforeMapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, beforeMapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #14
Source File: PartitionOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testRangePartitionOperatorPreservesFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L))); data.partitionByRange(1) .groupBy(1) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long,Long>>()) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode partitionNode = (SingleInputPlanNode)reducer.getInput().getSource(); SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy()); SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next(); List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels(); assertEquals(2, sourceOutgoingChannels.size()); assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy()); assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(1).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #15
Source File: PartitionPushdownTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartitioningReused() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L)); input .groupBy(0).sum(1) .groupBy(0, 1).sum(2) .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode agg1Reducer = (SingleInputPlanNode) agg2Reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, agg2Reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, agg1Reducer.getInput().getShipStrategy()); assertEquals(new FieldList(0), agg1Reducer.getInput().getShipStrategyKeys()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #16
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example #17
Source File: GroupingTupleTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #18
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBreakerForDependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).map(new IdentityMapper<String>()).withBroadcastSet(source1, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode beforeMapper = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, beforeMapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, beforeMapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #19
Source File: GroupingTupleTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #20
Source File: GroupingTupleTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #21
Source File: RelationalQueryCompilerTest.java From flink with Apache License 2.0 | 5 votes |
private boolean checkRepartitionShipStrategies(DualInputPlanNode join, SingleInputPlanNode reducer, SingleInputPlanNode combiner) { if (ShipStrategyType.PARTITION_HASH == join.getInput1().getShipStrategy() && ShipStrategyType.PARTITION_HASH == join.getInput2().getShipStrategy() && ShipStrategyType.FORWARD == reducer.getInput().getShipStrategy()) { // check combiner Assert.assertNull("Plan should not have a combiner", combiner); return true; } else { return false; } }
Example #22
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network * transit as well. */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism1() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType redIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn); }
Example #23
Source File: DistinctAndGroupingOptimizerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDistinctDestroysPartitioningOfNonDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(1) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer must repartition, because it works on a different field assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #24
Source File: PipelineBreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPipelineBreakerBroadcastedAllReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> bcInput1 = sourceWithMapper .map(new IdentityMapper<Long>()) .reduce(new SelectOneReducer<Long>()); DataSet<Long> bcInput2 = env.generateSequence(1, 10); DataSet<Long> result = sourceWithMapper .map(new IdentityMapper<Long>()) .withBroadcastSet(bcInput1, "bc1") .withBroadcastSet(bcInput2, "bc2"); result.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #25
Source File: ParallelismChangeTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example #26
Source File: PartitionPushdownTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartitioningReused() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L)); input .groupBy(0).sum(1) .groupBy(0, 1).sum(2) .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode agg1Reducer = (SingleInputPlanNode) agg2Reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, agg2Reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, agg1Reducer.getInput().getShipStrategy()); assertEquals(new FieldList(0), agg1Reducer.getInput().getShipStrategyKeys()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #27
Source File: WorksetIterationsRecordApiCompilerTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testRecordApiWithDirectSoltionSetUpdate() { Plan plan = getTestPlan(true, false); OptimizedPlan oPlan; try { oPlan = compileNoStats(plan); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly."); return; // silence the compiler } OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode joinWithInvariantNode = resolver.getNode(JOIN_WITH_INVARIANT_NAME); DualInputPlanNode joinWithSolutionSetNode = resolver.getNode(JOIN_WITH_SOLUTION_SET); SingleInputPlanNode worksetReducer = resolver.getNode(NEXT_WORKSET_REDUCER_NAME); // iteration preserves partitioning in reducer, so the first partitioning is out of the loop, // the in-loop partitioning is before the final reducer // verify joinWithInvariant assertEquals(ShipStrategyType.FORWARD, joinWithInvariantNode.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, joinWithInvariantNode.getInput2().getShipStrategy()); assertEquals(list0, joinWithInvariantNode.getKeysForInput1()); assertEquals(list0, joinWithInvariantNode.getKeysForInput2()); // verify joinWithSolutionSet assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput2().getShipStrategy()); // verify reducer assertEquals(ShipStrategyType.FORWARD, worksetReducer.getInput().getShipStrategy()); assertEquals(list0, worksetReducer.getKeys(0)); // verify solution delta assertEquals(1, joinWithSolutionSetNode.getOutgoingChannels().size()); assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getOutgoingChannels().get(0).getShipStrategy()); new JobGraphGenerator().compileJobGraph(oPlan); }
Example #28
Source File: AllGroupReduceProperties.java From flink with Apache License 2.0 | 4 votes |
@Override public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) { return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", in, DriverStrategy.ALL_GROUP_REDUCE); }
Example #29
Source File: GroupReduceProperties.java From flink with Apache License 2.0 | 4 votes |
@Override public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) { return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", in, DriverStrategy.SORTED_GROUP_REDUCE, this.keyList); }
Example #30
Source File: MapPartitionDescriptor.java From flink with Apache License 2.0 | 4 votes |
@Override public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) { return new SingleInputPlanNode(node, "MapPartition ("+node.getOperator().getName()+")", in, DriverStrategy.MAP_PARTITION); }