org.apache.flink.api.common.Plan#setExecutionConfig

Source File: LocalExecutorITCase.java From flink with Apache License 2.0

6 votes

@Test(timeout = 60_000)
public void testMiniClusterShutdownOnErrors() throws Exception {
	Plan runtimeExceptionPlan = getRuntimeExceptionPlan();
	runtimeExceptionPlan.setExecutionConfig(new ExecutionConfig());

	Configuration config = new Configuration();
	config.setBoolean(DeploymentOptions.ATTACHED, true);

	JobClient jobClient = executor.execute(runtimeExceptionPlan, config).get();

	assertThrows(
		"Job execution failed.",
		Exception.class,
		() -> jobClient.getJobExecutionResult(getClass().getClassLoader()).get());

	assertThat(miniCluster.isRunning(), is(false));
}

Source File: RelationalQueryCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly,
 * re-exploiting the sorted order is cheaper.
 */
@Test
public void testQueryWithStatsForRepartitionMerge() throws Exception {
	Plan p = getTPCH3Plan();
	p.setExecutionConfig(defaultExecutionConfig);
	// set compiler hints
	OperatorResolver cr = getContractResolver(p);
	DualInputOperator<?, ?, ?, ?> match = cr.getNode(JOIN_NAME);
	match.getCompilerHints().setFilterFactor(100f);

	testQueryGeneric(100L * 1024 * 1024 * 1024 * 1024, 100L * 1024 * 1024 * 1024 * 1024, 0.01f, 100f, false, true, false, false, true);
}

Source File: KMeansSingleStepTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCompileKMeansSingleStepWithOutStats() throws Exception {
	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	OptimizedPlan plan = compileNoStats(p);
	checkPlan(plan);
}

Source File: KMeansSingleStepTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCompileKMeansSingleStepWithStats() throws Exception {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}

Source File: RelationalQueryCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * Verifies that a robust repartitioning plan with a hash join is created in the absence of statistics.
 */
@Test
public void testQueryNoStatistics() {
	try {
		Plan p = getTPCH3Plan();
		p.setExecutionConfig(defaultExecutionConfig);
		// compile
		final OptimizedPlan plan = compileNoStats(p);

		final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);

		// get the nodes from the final plan
		final SinkPlanNode sink = or.getNode(SINK);
		final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
		final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ?
				(SingleInputPlanNode) reducer.getPredecessor() : null;
		final DualInputPlanNode join = or.getNode(JOIN_NAME);
		final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);

		// verify the optimizer choices
		checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
		Assert.assertTrue(checkRepartitionShipStrategies(join, reducer, combiner));
		Assert.assertTrue(checkHashJoinStrategies(join, reducer, true) || checkHashJoinStrategies(join, reducer, false));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: RelationalQueryCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly,
 * re-exploiting the sorted order is cheaper.
 */
@Test
public void testQueryWithStatsForRepartitionMerge() {
	Plan p = getTPCH3Plan();
	p.setExecutionConfig(defaultExecutionConfig);
	// set compiler hints
	OperatorResolver cr = getContractResolver(p);
	DualInputOperator<?, ?, ?, ?> match = cr.getNode(JOIN_NAME);
	match.getCompilerHints().setFilterFactor(100f);

	testQueryGeneric(100L * 1024 * 1024 * 1024 * 1024, 100L * 1024 * 1024 * 1024 * 1024, 0.01f, 100f, false, true, false, false, true);
}

Source File: RelationalQueryCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * Verifies that a robust repartitioning plan with a hash join is created in the absence of statistics.
 */
@Test
public void testQueryNoStatistics() {
	try {
		Plan p = getTPCH3Plan();
		p.setExecutionConfig(defaultExecutionConfig);
		// compile
		final OptimizedPlan plan = compileNoStats(p);

		final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);

		// get the nodes from the final plan
		final SinkPlanNode sink = or.getNode(SINK);
		final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
		final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ?
				(SingleInputPlanNode) reducer.getPredecessor() : null;
		final DualInputPlanNode join = or.getNode(JOIN_NAME);
		final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);

		// verify the optimizer choices
		checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
		Assert.assertTrue(checkRepartitionShipStrategies(join, reducer, combiner));
		Assert.assertTrue(checkHashJoinStrategies(join, reducer, true) || checkHashJoinStrategies(join, reducer, false));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: LocalExecutorITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testLocalExecutorWithWordCount() {
	try {
		// set up the files
		File inFile = File.createTempFile("wctext", ".in");
		File outFile = File.createTempFile("wctext", ".out");
		inFile.deleteOnExit();
		outFile.deleteOnExit();

		try (FileWriter fw = new FileWriter(inFile)) {
			fw.write(WordCountData.TEXT);
		}

		LocalExecutor executor = new LocalExecutor();
		executor.setDefaultOverwriteFiles(true);
		executor.setTaskManagerNumSlots(parallelism);
		executor.setPrintStatusDuringExecution(false);
		executor.start();
		Plan wcPlan = getWordCountPlan(inFile, outFile, parallelism);
		wcPlan.setExecutionConfig(new ExecutionConfig());
		executor.executePlan(wcPlan);
		executor.stop();
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: KMeansSingleStepTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}

Source File: PlanGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Create plan.
 *
 * @return the generated plan.
 */
private Plan createPlan() {
	final OperatorTranslation translator = new OperatorTranslation();
	final Plan plan = translator.translateToPlan(sinks, jobName);

	if (defaultParallelism > 0) {
		plan.setDefaultParallelism(defaultParallelism);
	}
	plan.setExecutionConfig(config);
	return plan;
}

Source File: RelationalQueryCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

private void testQueryGeneric(long orderSize, long lineItemSize,
		float ordersFilterFactor, float joinFilterFactor,
		boolean broadcastOkay, boolean partitionedOkay,
		boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) {
	Plan p = getTPCH3Plan();
	p.setExecutionConfig(defaultExecutionConfig);
	testQueryGeneric(p, orderSize, lineItemSize, ordersFilterFactor, joinFilterFactor, broadcastOkay, partitionedOkay, hashJoinFirstOkay, hashJoinSecondOkay, mergeJoinOkay);
}

Source File: RelationalQueryCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly,
 * re-exploiting the sorted order is cheaper.
 */
@Test
public void testQueryWithStatsForRepartitionMerge() {
	Plan p = getTPCH3Plan();
	p.setExecutionConfig(defaultExecutionConfig);
	// set compiler hints
	OperatorResolver cr = getContractResolver(p);
	DualInputOperator<?, ?, ?, ?> match = cr.getNode(JOIN_NAME);
	match.getCompilerHints().setFilterFactor(100f);

	testQueryGeneric(100L * 1024 * 1024 * 1024 * 1024, 100L * 1024 * 1024 * 1024 * 1024, 0.01f, 100f, false, true, false, false, true);
}

Source File: RelationalQueryCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Verifies that a robust repartitioning plan with a hash join is created in the absence of statistics.
 */
@Test
public void testQueryNoStatistics() {
	try {
		Plan p = getTPCH3Plan();
		p.setExecutionConfig(defaultExecutionConfig);
		// compile
		final OptimizedPlan plan = compileNoStats(p);

		final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);

		// get the nodes from the final plan
		final SinkPlanNode sink = or.getNode(SINK);
		final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
		final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ?
				(SingleInputPlanNode) reducer.getPredecessor() : null;
		final DualInputPlanNode join = or.getNode(JOIN_NAME);
		final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);

		// verify the optimizer choices
		checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
		Assert.assertTrue(checkRepartitionShipStrategies(join, reducer, combiner));
		Assert.assertTrue(checkHashJoinStrategies(join, reducer, true) || checkHashJoinStrategies(join, reducer, false));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: KMeansSingleStepTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCompileKMeansSingleStepWithOutStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	OptimizedPlan plan = compileNoStats(p);
	checkPlan(plan);
}

Source File: KMeansSingleStepTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}

Source File: ConnectedComponentsCoGroupTest.java From flink with Apache License 2.0

4 votes

@Test
public void testWorksetConnectedComponents() throws Exception {
	Plan plan = getConnectedComponentsCoGroupPlan();
	plan.setExecutionConfig(new ExecutionConfig());
	OptimizedPlan optPlan = compileNoStats(plan);
	OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

	if (PRINT_PLAN) {
		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		String json = dumper.getOptimizerPlanAsJSON(optPlan);
		System.out.println(json);
	}

	SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
	SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
	SinkPlanNode sink = or.getNode(SINK);
	WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);

	DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
	DualInputPlanNode cogroup = or.getNode(MIN_ID_AND_UPDATE);

	// --------------------------------------------------------------------
	// Plan validation:
	//
	// We expect the plan to go with a sort-merge join, because the CoGroup
	// sorts and the join in the successive iteration can re-exploit the sorting.
	// --------------------------------------------------------------------

	// test all drivers
	Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());

	Assert.assertEquals(DriverStrategy.INNER_MERGE, neighborsJoin.getDriverStrategy());
	Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
	Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());

	Assert.assertEquals(DriverStrategy.CO_GROUP, cogroup.getDriverStrategy());
	Assert.assertEquals(set0, cogroup.getKeysForInput1());
	Assert.assertEquals(set0, cogroup.getKeysForInput2());

	// test all the shipping strategies
	Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
	Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
	Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());

	Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy()); // workset
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy()); // edges
	Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
	Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());

	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, cogroup.getInput1().getShipStrategy()); // min id
	Assert.assertEquals(ShipStrategyType.FORWARD, cogroup.getInput2().getShipStrategy()); // solution set

	// test all the local strategies
	Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());

	// the sort for the neighbor join in the first iteration is pushed out of the loop
	Assert.assertEquals(LocalStrategy.SORT, iter.getInitialWorksetInput().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy()); // workset
	Assert.assertEquals(LocalStrategy.SORT, neighborsJoin.getInput2().getLocalStrategy()); // edges

	Assert.assertEquals(LocalStrategy.SORT, cogroup.getInput1().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, cogroup.getInput2().getLocalStrategy()); // solution set

	// check the caches
	Assert.assertTrue(TempMode.CACHED == neighborsJoin.getInput2().getTempMode());

	JobGraphGenerator jgg = new JobGraphGenerator();
	jgg.compileJobGraph(optPlan);
}

Source File: WordCountCompilerTest.java From flink with Apache License 2.0

4 votes

private void checkWordCount(boolean estimates) {

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		// get input data
		DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");

		lines
			// dummy map
			.map(new MapFunction<String, Tuple2<String, Integer>>() {
				private static final long serialVersionUID = -3952739820618875030L;
				@Override
				public Tuple2<String, Integer> map(String v) throws Exception {
					return new Tuple2<>(v, 1);
				}
			}).name("Tokenize Lines")
			// count
				.groupBy(0).sum(1).name("Count Words")
			// discard
				.output(new DiscardingOutputFormat<Tuple2<String, Integer>>()).name("Word Counts");

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		p.setExecutionConfig(new ExecutionConfig());

		OptimizedPlan plan;
		if (estimates) {
			GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines");
			setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f);
			plan = compileWithStats(p);
		} else {
			plan = compileNoStats(p);
		}

		// get the optimizer plan nodes
		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(plan);
		SinkPlanNode sink = resolver.getNode("Word Counts");
		SingleInputPlanNode reducer = resolver.getNode("Count Words");
		SingleInputPlanNode mapper = resolver.getNode("Tokenize Lines");

		// verify the strategies
		Assert.assertEquals(ShipStrategyType.FORWARD, mapper.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

		Channel c = reducer.getInput();
		Assert.assertEquals(LocalStrategy.COMBININGSORT, c.getLocalStrategy());
		FieldList l = new FieldList(0);
		Assert.assertEquals(l, c.getShipStrategyKeys());
		Assert.assertEquals(l, c.getLocalStrategyKeys());
		Assert.assertTrue(Arrays.equals(c.getLocalStrategySortOrder(), reducer.getSortOrders(0)));

		// check the combiner
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getPredecessor();
		Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
		Assert.assertEquals(l, combiner.getKeys(0));
		Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	}

Source File: ConnectedComponentsCoGroupTest.java From flink with Apache License 2.0

4 votes

@Test
public void testWorksetConnectedComponents() {
	Plan plan = getConnectedComponentsCoGroupPlan();
	plan.setExecutionConfig(new ExecutionConfig());
	OptimizedPlan optPlan = compileNoStats(plan);
	OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

	if (PRINT_PLAN) {
		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		String json = dumper.getOptimizerPlanAsJSON(optPlan);
		System.out.println(json);
	}

	SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
	SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
	SinkPlanNode sink = or.getNode(SINK);
	WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);

	DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
	DualInputPlanNode cogroup = or.getNode(MIN_ID_AND_UPDATE);

	// --------------------------------------------------------------------
	// Plan validation:
	//
	// We expect the plan to go with a sort-merge join, because the CoGroup
	// sorts and the join in the successive iteration can re-exploit the sorting.
	// --------------------------------------------------------------------

	// test all drivers
	Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());

	Assert.assertEquals(DriverStrategy.INNER_MERGE, neighborsJoin.getDriverStrategy());
	Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
	Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());

	Assert.assertEquals(DriverStrategy.CO_GROUP, cogroup.getDriverStrategy());
	Assert.assertEquals(set0, cogroup.getKeysForInput1());
	Assert.assertEquals(set0, cogroup.getKeysForInput2());

	// test all the shipping strategies
	Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
	Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
	Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());

	Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy()); // workset
	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy()); // edges
	Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
	Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());

	Assert.assertEquals(ShipStrategyType.PARTITION_HASH, cogroup.getInput1().getShipStrategy()); // min id
	Assert.assertEquals(ShipStrategyType.FORWARD, cogroup.getInput2().getShipStrategy()); // solution set

	// test all the local strategies
	Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());

	// the sort for the neighbor join in the first iteration is pushed out of the loop
	Assert.assertEquals(LocalStrategy.SORT, iter.getInitialWorksetInput().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy()); // workset
	Assert.assertEquals(LocalStrategy.SORT, neighborsJoin.getInput2().getLocalStrategy()); // edges

	Assert.assertEquals(LocalStrategy.SORT, cogroup.getInput1().getLocalStrategy());
	Assert.assertEquals(LocalStrategy.NONE, cogroup.getInput2().getLocalStrategy()); // solution set

	// check the caches
	Assert.assertTrue(TempMode.CACHED == neighborsJoin.getInput2().getTempMode());

	JobGraphGenerator jgg = new JobGraphGenerator();
	jgg.compileJobGraph(optPlan);
}

Source File: WordCountCompilerTest.java From flink with Apache License 2.0

4 votes

private void checkWordCount(boolean estimates) {

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		// get input data
		DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");

		lines
			// dummy map
			.map(new MapFunction<String, Tuple2<String, Integer>>() {
				private static final long serialVersionUID = -3952739820618875030L;
				@Override
				public Tuple2<String, Integer> map(String v) throws Exception {
					return new Tuple2<>(v, 1);
				}
			}).name("Tokenize Lines")
			// count
				.groupBy(0).sum(1).name("Count Words")
			// discard
				.output(new DiscardingOutputFormat<Tuple2<String, Integer>>()).name("Word Counts");

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		p.setExecutionConfig(new ExecutionConfig());

		OptimizedPlan plan;
		if (estimates) {
			GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines");
			setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f);
			plan = compileWithStats(p);
		} else {
			plan = compileNoStats(p);
		}

		// get the optimizer plan nodes
		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(plan);
		SinkPlanNode sink = resolver.getNode("Word Counts");
		SingleInputPlanNode reducer = resolver.getNode("Count Words");
		SingleInputPlanNode mapper = resolver.getNode("Tokenize Lines");

		// verify the strategies
		Assert.assertEquals(ShipStrategyType.FORWARD, mapper.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

		Channel c = reducer.getInput();
		Assert.assertEquals(LocalStrategy.COMBININGSORT, c.getLocalStrategy());
		FieldList l = new FieldList(0);
		Assert.assertEquals(l, c.getShipStrategyKeys());
		Assert.assertEquals(l, c.getLocalStrategyKeys());
		Assert.assertTrue(Arrays.equals(c.getLocalStrategySortOrder(), reducer.getSortOrders(0)));

		// check the combiner
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getPredecessor();
		Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
		Assert.assertEquals(l, combiner.getKeys(0));
		Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	}

Source File: WordCountCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

private void checkWordCount(boolean estimates) {

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		// get input data
		DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");

		lines
			// dummy map
			.map(new MapFunction<String, Tuple2<String, Integer>>() {
				private static final long serialVersionUID = -3952739820618875030L;
				@Override
				public Tuple2<String, Integer> map(String v) throws Exception {
					return new Tuple2<>(v, 1);
				}
			}).name("Tokenize Lines")
			// count
				.groupBy(0).sum(1).name("Count Words")
			// discard
				.output(new DiscardingOutputFormat<Tuple2<String, Integer>>()).name("Word Counts");

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		p.setExecutionConfig(new ExecutionConfig());

		OptimizedPlan plan;
		if (estimates) {
			GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines");
			setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f);
			plan = compileWithStats(p);
		} else {
			plan = compileNoStats(p);
		}

		// get the optimizer plan nodes
		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(plan);
		SinkPlanNode sink = resolver.getNode("Word Counts");
		SingleInputPlanNode reducer = resolver.getNode("Count Words");
		SingleInputPlanNode mapper = resolver.getNode("Tokenize Lines");

		// verify the strategies
		Assert.assertEquals(ShipStrategyType.FORWARD, mapper.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
		Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

		Channel c = reducer.getInput();
		Assert.assertEquals(LocalStrategy.COMBININGSORT, c.getLocalStrategy());
		FieldList l = new FieldList(0);
		Assert.assertEquals(l, c.getShipStrategyKeys());
		Assert.assertEquals(l, c.getLocalStrategyKeys());
		Assert.assertTrue(Arrays.equals(c.getLocalStrategySortOrder(), reducer.getSortOrders(0)));

		// check the combiner
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getPredecessor();
		Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
		Assert.assertEquals(l, combiner.getKeys(0));
		Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	}

Java Code Examples for org.apache.flink.api.common.Plan#setExecutionConfig()