org.apache.flink.optimizer.plan.OptimizedPlan Java Examples
The following examples show how to use
org.apache.flink.optimizer.plan.OptimizedPlan.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind reduce. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindReduce() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .reduce(new LastReduce()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #2
Source File: IterationCompilerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testIdentityIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100); iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #3
Source File: TestEnvironment.java From flink with Apache License 2.0 | 6 votes |
@Override public JobExecutionResult execute(String jobName) throws Exception { OptimizedPlan op = compileProgram(jobName); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jobGraph = jgg.compileJobGraph(op); for (Path jarFile: jarFiles) { jobGraph.addJar(jarFile); } jobGraph.setClasspaths(new ArrayList<>(classPaths)); this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph); return this.lastJobExecutionResult; }
Example #4
Source File: ReduceAllTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReduce() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); try { OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly"); } }
Example #5
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
/** * Verify operator parallelism. * * @param env the Flink execution environment. * @param expectedParallelism expected operator parallelism */ public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) { env.setParallelism(2 * expectedParallelism); Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism); for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #6
Source File: ReduceAllTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReduce() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); try { OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly"); } }
Example #7
Source File: ClientTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testGetExecutionPlan() throws ProgramInvocationException { PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp"); assertNotNull(prg.getPreviewPlan()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); }
Example #8
Source File: UnionReplacementTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnionReplacement() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input1 = env.fromElements("test1"); DataSet<String> input2 = env.fromElements("test2"); DataSet<String> union = input1.union(input2); union.output(new DiscardingOutputFormat<String>()); union.output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #9
Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0 | 6 votes |
/** * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED * when inside of an iteration an on the static path */ @Test public void testRightSide() { try { Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
Example #10
Source File: UnionReplacementTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnionReplacement() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input1 = env.fromElements("test1"); DataSet<String> input2 = env.fromElements("test2"); DataSet<String> union = input1.union(input2); union.output(new DiscardingOutputFormat<String>()); union.output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #11
Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0 | 6 votes |
/** * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant */ @Test public void testRightSideCountercheck() { try { Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
Example #12
Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0 | 6 votes |
/** * This tests whether a HYBRIDHASH_BUILD_FIRST is correctly transformed to a HYBRIDHASH_BUILD_FIRST_CACHED * when inside of an iteration an on the static path */ @Test public void testLeftSide() { try { Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
Example #13
Source File: DisjointDataFlowsTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testDisjointFlows() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // generate two different flows env.generateSequence(1, 10) .output(new DiscardingOutputFormat<Long>()); env.generateSequence(1, 10) .output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #14
Source File: PlanJSONDumpGenerator.java From flink with Apache License 2.0 | 5 votes |
public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) { Collection<SinkPlanNode> sinks = plan.getDataSinks(); if (sinks instanceof List) { dumpOptimizerPlanAsJSON((List<SinkPlanNode>) sinks, writer); } else { List<SinkPlanNode> n = new ArrayList<SinkPlanNode>(); n.addAll(sinks); dumpOptimizerPlanAsJSON(n, writer); } }
Example #15
Source File: KMeansSingleStepTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCompileKMeansSingleStepWithOutStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); OptimizedPlan plan = compileNoStats(p); checkPlan(plan); }
Example #16
Source File: DistinctAndGroupingOptimizerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #17
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testBranchingUnion() { try { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source1 = env.generateSequence(0,1); DataSet<Long> source2 = env.generateSequence(0,1); DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 1"); DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1"); DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1"); DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2"); DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2"); DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3"); DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3) .join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 2"); join2.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); //Compile plan to verify that no error is thrown jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #18
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #19
Source File: AccumulatorLiveITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Helpers to generate the JobGraph. */ private static JobGraph getJobGraph(Plan plan) { Optimizer pc = new Optimizer(new DataStatistics(), new Configuration()); JobGraphGenerator jgg = new JobGraphGenerator(); OptimizedPlan op = pc.compile(plan); return jgg.compileJobGraph(op); }
Example #20
Source File: CoGroupCustomPartitioningTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCoGroupWithKeySelectors() { try { final Partitioner<Integer> partitioner = new TestPartitionerInt(); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> input1 = env.fromElements(new Pojo2()); DataSet<Pojo3> input2 = env.fromElements(new Pojo3()); input1 .coGroup(input2) .where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector()) .withPartitioner(partitioner) .with(new DummyCoGroupFunction<Pojo2, Pojo3>()) .output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource(); assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy()); assertEquals(partitioner, join.getInput1().getPartitioner()); assertEquals(partitioner, join.getInput2().getPartitioner()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #21
Source File: GroupingTupleTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleAgg() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #22
Source File: PartitionOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testPartitionCustomOperatorPreservesFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L))); data.partitionCustom(new Partitioner<Long>() { public int partition(Long key, int numPartitions) { return key.intValue(); } }, 1) .groupBy(1) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode partitioner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #23
Source File: GroupingKeySelectorTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningKeySelectorGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(new TestKeySelector<Tuple2<Integer,Integer>>()) .withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #24
Source File: AccumulatorLiveITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Helpers to generate the JobGraph. */ private static JobGraph getJobGraph(Plan plan) { Optimizer pc = new Optimizer(new DataStatistics(), new Configuration()); JobGraphGenerator jgg = new JobGraphGenerator(); OptimizedPlan op = pc.compile(plan); return jgg.compileJobGraph(op); }
Example #25
Source File: GroupingTupleTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleAgg() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #26
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #27
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctDestroysPartitioningOfNonDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(1) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer must repartition, because it works on a different field assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #28
Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #29
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example #30
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }