org.apache.flink.optimizer.testfunctions.IdentityMapper Java Examples
The following examples show how to use
org.apache.flink.optimizer.testfunctions.IdentityMapper.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HardPlansCompilationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce -> * |--------------------------/ / * |--------------------------------------------/ * * First cross has SameKeyFirst output contract */ @Test public void testTicket158() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.map(new IdentityMapper<Long>()).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2") .cross(set1).with(new IdentityCrosser<Long>()).name("Cross2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); }
Example #2
Source File: HardPlansCompilationTest.java From flink with Apache License 2.0 | 6 votes |
/** * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce -> * |--------------------------/ / * |--------------------------------------------/ * * First cross has SameKeyFirst output contract */ @Test public void testTicket158() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.map(new IdentityMapper<Long>()).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2") .cross(set1).with(new IdentityCrosser<Long>()).name("Cross2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); }
Example #3
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testBranchesOnlyInBCVariables1() { try{ ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Long> input = env.generateSequence(1, 10); DataSet<Long> bc_input = env.generateSequence(1, 10); input .map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name1") .map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name2") .output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); compileNoStats(plan); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example #4
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testBranchAfterIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> sourceA = env.generateSequence(0,1); IterativeDataSet<Long> loopHead = sourceA.iterate(10); DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper"); DataSet<Long> loopRes = loopHead.closeWith(loopTail); loopRes.output(new DiscardingOutputFormat<Long>()); loopRes.map(new IdentityMapper<Long>()) .output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); try { compileNoStats(plan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #5
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testBranchAfterIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> sourceA = env.generateSequence(0,1); IterativeDataSet<Long> loopHead = sourceA.iterate(10); DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper"); DataSet<Long> loopRes = loopHead.closeWith(loopTail); loopRes.output(new DiscardingOutputFormat<Long>()); loopRes.map(new IdentityMapper<Long>()) .output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); try { compileNoStats(plan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #6
Source File: PipelineBreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPipelineBreakerBroadcastedAllReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> bcInput1 = sourceWithMapper .map(new IdentityMapper<Long>()) .reduce(new SelectOneReducer<Long>()); DataSet<Long> bcInput2 = env.generateSequence(1, 10); DataSet<Long> result = sourceWithMapper .map(new IdentityMapper<Long>()) .withBroadcastSet(bcInput1, "bc1") .withBroadcastSet(bcInput2, "bc2"); result.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #7
Source File: JobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testGeneratingJobGraphWithUnconsumedResultPartition() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L)) .setParallelism(1); DataSet<Tuple2<Long, Long>> ds = input.map(new IdentityMapper<>()) .setParallelism(3); AbstractID intermediateDataSetID = new AbstractID(); // this output branch will be excluded. ds.output(BlockingShuffleOutputFormat.createOutputFormat(intermediateDataSetID)) .setParallelism(1); // this is the normal output branch. ds.output(new DiscardingOutputFormat<>()) .setParallelism(1); JobGraph jobGraph = compileJob(env); Assert.assertEquals(3, jobGraph.getVerticesSortedTopologicallyFromSources().size()); JobVertex mapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1); Assert.assertThat(mapVertex, Matchers.instanceOf(JobVertex.class)); // there are 2 output result with one of them is ResultPartitionType.BLOCKING_PERSISTENT Assert.assertEquals(2, mapVertex.getProducedDataSets().size()); Assert.assertTrue(mapVertex.getProducedDataSets().stream() .anyMatch(dataSet -> dataSet.getId().equals(new IntermediateDataSetID(intermediateDataSetID)) && dataSet.getResultType() == ResultPartitionType.BLOCKING_PERSISTENT)); }
Example #8
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #9
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBreakerForDependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).map(new IdentityMapper<String>()).withBroadcastSet(source1, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode beforeMapper = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, beforeMapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, beforeMapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #10
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #11
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example #12
Source File: PipelineBreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPipelineBreakerWithBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> result = source.map(new IdentityMapper<Long>()) .map(new IdentityMapper<Long>()) .withBroadcastSet(source, "bc"); result.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #13
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBranchesOnlyInBCVariables2() { try{ ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 10).map(new Duplicator<Long>()).name("proper input"); DataSet<Long> bc_input1 = env.generateSequence(1, 10).name("BC input 1"); DataSet<Long> bc_input2 = env.generateSequence(1, 10).name("BC input 1"); DataSet<Tuple2<Long, Long>> joinInput1 = input.map(new IdentityMapper<Tuple2<Long,Long>>()) .withBroadcastSet(bc_input1.map(new IdentityMapper<Long>()), "bc1") .withBroadcastSet(bc_input2, "bc2"); DataSet<Tuple2<Long, Long>> joinInput2 = input.map(new IdentityMapper<Tuple2<Long,Long>>()) .withBroadcastSet(bc_input1, "bc1") .withBroadcastSet(bc_input2, "bc2"); DataSet<Tuple2<Long, Long>> joinResult = joinInput1 .join(joinInput2, JoinHint.REPARTITION_HASH_FIRST).where(0).equalTo(1) .with(new DummyFlatJoinFunction<Tuple2<Long,Long>>()); input .map(new IdentityMapper<Tuple2<Long,Long>>()) .withBroadcastSet(bc_input1, "bc1") .union(joinResult) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); compileNoStats(plan); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example #14
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * <pre> * +---------Iteration-------+ * | | * /--map--< >----\ | * / | \ /-------< >---sink * src-map | join------/ | * \ | / | * \ +-----/-------------------+ * \ / * \--reduce--/ * </pre> */ @Test public void testIterationWithStaticInput() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Long> source = env.generateSequence(1, 1000000); DataSet<Long> mapped = source.map(new IdentityMapper<Long>()); DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>()); IterativeDataSet<Long> iteration = mapped.iterate(10); iteration.closeWith( iteration.join(reduced) .where(new IdentityKeyExtractor<Long>()) .equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>())) .output(new DiscardingOutputFormat<Long>()); compileNoStats(env.createProgramPlan()); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example #15
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkPropertyHandlingWithDecreasingParallelism() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); env .generateSequence(0, 1).setParallelism(p * 2) .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); Assert.assertTrue("The no sorting local strategy.", LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() || LocalStrategy.SORT == map2Node.getInput().getLocalStrategy()); Assert.assertTrue("The no partitioning ship strategy.", ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() || ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy()); }
Example #16
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCostComputationWithMultipleDataSinks() { final int SINKS = 5; try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source = env.generateSequence(1, 10000); DataSet<Long> mappedA = source.map(new IdentityMapper<Long>()); DataSet<Long> mappedC = source.map(new IdentityMapper<Long>()); for (int sink = 0; sink < SINKS; sink++) { mappedA.output(new DiscardingOutputFormat<Long>()); mappedC.output(new DiscardingOutputFormat<Long>()); } Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks"); OptimizedPlan oPlan = compileNoStats(plan); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #17
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBranchingUnion() { try { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source1 = env.generateSequence(0,1); DataSet<Long> source2 = env.generateSequence(0,1); DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 1"); DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1"); DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1"); DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2"); DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2"); DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3"); DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3) .join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 2"); join2.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); //Compile plan to verify that no error is thrown jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #18
Source File: PipelineBreakerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testPipelineBreakerWithBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> result = source.map(new IdentityMapper<Long>()) .map(new IdentityMapper<Long>()) .withBroadcastSet(source, "bc"); result.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #19
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between map and reduce (hash). */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism2() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn); }
Example #20
Source File: NestedIterationsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBulkIterationInClosure() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> data1 = env.generateSequence(1, 100); DataSet<Long> data2 = env.generateSequence(1, 100); IterativeDataSet<Long> firstIteration = data1.iterate(100); DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>())); IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100); DataSet<Long> joined = mainIteration.join(firstResult) .where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>()); DataSet<Long> mainResult = mainIteration.closeWith(joined); mainResult.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); // optimizer should be able to translate this OptimizedPlan op = compileNoStats(p); // job graph generator should be able to translate this new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #21
Source File: WorksetIterationsRecordApiCompilerTest.java From flink with Apache License 2.0 | 5 votes |
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set"); DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset"); DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME); DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0) .with(new IdentityJoiner<Tuple2<Long, Long>>()) .name(JOIN_WITH_SOLUTION_SET); if(joinPreservesSolutionSet) { ((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*"); } DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>()) .withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME); if(mapBeforeSolutionDelta) { DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>()) .withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME); deltaIt.closeWith(mapper, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); } else { deltaIt.closeWith(join2, nextWorkset) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); } return env.createProgramPlan(); }
Example #22
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationPipelineBreakerPlacement() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); // the workset (input two of the delta iteration) is the same as what is consumed be the successive join DataSet<Tuple2<Long, Long>> initialWorkset = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); // trivial iteration, since we are interested in the inputs to the iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialWorkset, 100, 0); DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()); DataSet<Tuple2<Long, Long>> result = iteration.closeWith(next, next); initialWorkset .join(result, JoinHint.REPARTITION_HASH_FIRST) .where(0).equalTo(0) .output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()); Plan p = env.createProgramPlan(); compileNoStats(p); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #23
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSolutionSetDeltaDependsOnBroadcastVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); DataSet<Tuple2<Long, Long>> invariantInput = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>()); // iteration from here DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1); DataSet<Tuple2<Long, Long>> result = invariantInput .map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data") .join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1); iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result) .output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); OptimizedPlan p = compileNoStats(env.createProgramPlan()); // check that the JSON generator accepts this plan new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p); // check that the JobGraphGenerator accepts the plan new JobGraphGenerator().compileJobGraph(p); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #24
Source File: JobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testGeneratingJobGraphWithUnconsumedResultPartition() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L)) .setParallelism(1); DataSet<Tuple2<Long, Long>> ds = input.map(new IdentityMapper<>()) .setParallelism(3); AbstractID intermediateDataSetID = new AbstractID(); // this output branch will be excluded. ds.output(BlockingShuffleOutputFormat.createOutputFormat(intermediateDataSetID)) .setParallelism(1); // this is the normal output branch. ds.output(new DiscardingOutputFormat<>()) .setParallelism(1); JobGraph jobGraph = compileJob(env); Assert.assertEquals(3, jobGraph.getVerticesSortedTopologicallyFromSources().size()); JobVertex mapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1); Assert.assertThat(mapVertex, Matchers.instanceOf(JobVertex.class)); // there are 2 output result with one of them is ResultPartitionType.BLOCKING_PERSISTENT Assert.assertEquals(2, mapVertex.getProducedDataSets().size()); Assert.assertTrue(mapVertex.getProducedDataSets().stream() .anyMatch(dataSet -> dataSet.getId().equals(new IntermediateDataSetID(intermediateDataSetID)) && dataSet.getResultType() == ResultPartitionType.BLOCKING_PERSISTENT)); }
Example #25
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctDestroysPartitioningOfNonDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(1) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer must repartition, because it works on a different field assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #26
Source File: BroadcastVariablePipelinebreakerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #27
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBreakerForDependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).map(new IdentityMapper<String>()).withBroadcastSet(source1, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode beforeMapper = (SingleInputPlanNode) mapper.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, beforeMapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, beforeMapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #28
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #29
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctDestroysPartitioningOfNonDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(1) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer must repartition, because it works on a different field assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #30
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }