org.apache.flink.optimizer.plantranslate.JobGraphGenerator Java Exaples

Source File: ReduceAllTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}

Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0

6 votes

public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) {
	JobGraph job;
	if (optPlan instanceof StreamingPlan) {
		job = ((StreamingPlan) optPlan).getJobGraph();
		job.setSavepointRestoreSettings(savepointSettings);
	} else {
		JobGraphGenerator gen = new JobGraphGenerator(flinkConfig);
		job = gen.compileJobGraph((OptimizedPlan) optPlan);
	}

	for (URL jar : jarFiles) {
		try {
			job.addJar(new Path(jar.toURI()));
		} catch (URISyntaxException e) {
			throw new RuntimeException("URL is invalid. This should not happen.", e);
		}
	}

	job.setClasspaths(classpaths);

	return job;
}

Source File: HardPlansCompilationTest.java From flink with Apache License 2.0

6 votes

/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}

Source File: UnionReplacementTest.java From flink with Apache License 2.0

6 votes

@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: DisjointDataFlowsTest.java From flink with Apache License 2.0

6 votes

@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: JsonJobGraphGenerationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}

Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0

6 votes

/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: TestEnvironment.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	OptimizedPlan op = compileProgram(jobName);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	for (Path jarFile: jarFiles) {
		jobGraph.addJar(jarFile);
	}

	jobGraph.setClasspaths(new ArrayList<>(classPaths));

	this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph);
	return this.lastJobExecutionResult;
}

Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: HardPlansCompilationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}

Source File: UnionReplacementTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: DisjointDataFlowsTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: CachedMatchStrategyCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testRightSide() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: CachedMatchStrategyCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: CachedMatchStrategyCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This tests whether a HYBRIDHASH_BUILD_FIRST is correctly transformed to a HYBRIDHASH_BUILD_FIRST_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testLeftSide() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: CachedMatchStrategyCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testLeftSideCountercheck() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, innerJoin.getDriverStrategy());
		assertEquals(TempMode.CACHED, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: ReduceAllTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}

Source File: TestEnvironment.java From flink with Apache License 2.0

6 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	OptimizedPlan op = compileProgram(jobName);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	for (Path jarFile: jarFiles) {
		jobGraph.addJar(jarFile);
	}

	jobGraph.setClasspaths(new ArrayList<>(classPaths));

	this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph);
	return this.lastJobExecutionResult;
}

Source File: ClusterClient.java From flink with Apache License 2.0

6 votes

public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) {
	JobGraph job;
	if (optPlan instanceof StreamingPlan) {
		job = ((StreamingPlan) optPlan).getJobGraph();
		job.setSavepointRestoreSettings(savepointSettings);
	} else {
		JobGraphGenerator gen = new JobGraphGenerator(flinkConfig);
		job = gen.compileJobGraph((OptimizedPlan) optPlan);
	}

	for (URL jar : jarFiles) {
		try {
			job.addJar(new Path(jar.toURI()));
		} catch (URISyntaxException e) {
			throw new RuntimeException("URL is invalid. This should not happen.", e);
		}
	}

	job.setClasspaths(classpaths);

	return job;
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: WorksetIterationCornerCasesTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: NestedIterationsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: CachedMatchStrategyCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
 */
@Test
public void testCorrectChoosing() {
	try {
		
		Plan plan = getTestPlanRightStatic("");
		
		SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
		plan.accept(sourceCollector);
		
		for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
			if(s.getName().equals("bigFile")) {
				this.setSourceStatistics(s, 10000000, 1000);
			}
			else if(s.getName().equals("smallFile")) {
				this.setSourceStatistics(s, 100, 100);
			}
		}
		
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCostComputationWithMultipleDataSinks() {
	final int SINKS = 5;

	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());

		for (int sink = 0; sink < SINKS; sink++) {
			mappedA.output(new DiscardingOutputFormat<Long>());
			mappedC.output(new DiscardingOutputFormat<Long>());
		}

		Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
		OptimizedPlan oPlan = compileNoStats(plan);

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: AccumulatorLiveITCase.java From flink with Apache License 2.0

5 votes

/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		// make sure that all vertices start immediately
		jobGraph.setScheduleMode(streamGraph.getScheduleMode());

		// Generate deterministic hashes for the nodes in order to identify them across
		// submission iff they didn't change.
		Map<Integer, byte[]> hashes = defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);

		// Generate legacy version hashes for backwards compatibility
		List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
		for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
			legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
		}

		Map<Integer, List<Tuple2<byte[], byte[]>>> chainedOperatorHashes = new HashMap<>();

		setChaining(hashes, legacyHashes, chainedOperatorHashes);

		setPhysicalEdges();

		setSlotSharingAndCoLocation();

		configureCheckpointing();

		JobGraphGenerator.addUserArtifactEntries(streamGraph.getUserArtifacts(), jobGraph);

		// set the ExecutionConfig last when it has been finalized
		try {
			jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
		}
		catch (IOException e) {
			throw new IllegalConfigurationException("Could not serialize the ExecutionConfig." +
					"This indicates that non-serializable types (like custom serializers) were registered");
		}

		return jobGraph;
	}

Source File: MultipleJoinsWithSolutionSetCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMultiSolutionSetJoinPlan() {
	try {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
		DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);

		// add two sinks, to test the case of branching after an iteration
		result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
		result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());

		Plan p = env.createProgramPlan();

		OptimizedPlan optPlan = compileNoStats(p);

		OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

		DualInputPlanNode join1 = or.getNode(JOIN_1);
		DualInputPlanNode join2 = or.getNode(JOIN_2);

		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, join1.getDriverStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, join2.getDriverStrategy());

		assertEquals(ShipStrategyType.PARTITION_HASH, join1.getInput2().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, join2.getInput1().getShipStrategy());

		assertEquals(SolutionSetPlanNode.class, join1.getInput1().getSource().getClass());
		assertEquals(SolutionSetPlanNode.class, join2.getInput2().getSource().getClass());

		new JobGraphGenerator().compileJobGraph(optPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test erroneous: " + e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCostComputationWithMultipleDataSinks() {
	final int SINKS = 5;

	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());

		for (int sink = 0; sink < SINKS; sink++) {
			mappedA.output(new DiscardingOutputFormat<Long>());
			mappedC.output(new DiscardingOutputFormat<Long>());
		}

		Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
		OptimizedPlan oPlan = compileNoStats(plan);

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

org.apache.flink.optimizer.plantranslate.JobGraphGenerator Java Examples