org.apache.flink.optimizer.Optimizer Java Exaples

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}

Source File: FlinkRunnerTest.java From beam with Apache License 2.0

6 votes

@Test
public void testEnsureStdoutStdErrIsRestored() throws Exception {
  PackagedProgram packagedProgram = new PackagedProgram(getClass());
  OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(new Optimizer(new Configuration()));
  try {
    // Flink will throw an error because no job graph will be generated by the main method
    env.getOptimizedPlan(packagedProgram);
    Assert.fail("This should have failed to create the Flink Plan.");
  } catch (ProgramInvocationException e) {
    // Test that Flink wasn't able to intercept the stdout/stderr and we printed to the regular
    // output instead
    MatcherAssert.assertThat(
        e.getMessage(),
        allOf(
            StringContains.containsString("System.out: (none)"),
            StringContains.containsString("System.err: (none)")));
  }
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0

6 votes

public static FlinkPlan getOptimizedPlan(Optimizer compiler, PackagedProgram prog, int parallelism)
		throws CompilerException, ProgramInvocationException {
	Thread.currentThread().setContextClassLoader(prog.getUserCodeClassLoader());
	if (prog.isUsingProgramEntryPoint()) {
		return getOptimizedPlan(compiler, prog.getPlanWithJars(), parallelism);
	} else if (prog.isUsingInteractiveMode()) {
		// temporary hack to support the optimizer plan preview
		OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(compiler);
		if (parallelism > 0) {
			env.setParallelism(parallelism);
		}

		return env.getOptimizedPlan(prog);
	} else {
		throw new RuntimeException("Couldn't determine program mode.");
	}
}

Source File: ClientTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = PackagedProgram.newBuilder()
		.setEntryPointClassName(TestOptimizerPlan.class.getName())
		.setArguments("/dev/random", "/tmp")
		.build();

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true);
	OptimizedPlan op = optimizer.compile(plan);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}

Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);
	this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig);

	this.actorSystemLoader = new LazyActorSystemLoader(
		highAvailabilityServices,
		Time.milliseconds(lookupTimeout.toMillis()),
		flinkConfig,
		log);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}

Source File: PreviewPlanDumpTest.java From flink with Apache License 2.0

6 votes

private static void verifyPlanDump(Class<?> entrypoint, String... args) throws Exception {
	final PackagedProgram program = PackagedProgram
		.newBuilder()
		.setEntryPointClassName(entrypoint.getName())
		.setArguments(args)
		.build();

	final Pipeline pipeline = PackagedProgramUtils.getPipelineFromProgram(program, new Configuration(), 1, true);

	assertTrue(pipeline instanceof Plan);

	final Plan plan = (Plan) pipeline;

	final List<DataSinkNode> sinks = Optimizer.createPreOptimizedPlan(plan);
	final PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	final String json = dumper.getPactPlanAsJSON(sinks);

	try (JsonParser parser = new JsonFactory().createParser(json)) {
		while (parser.nextToken() != null) {
		}
	}
}

Source File: ExecutionContext.java From flink with Apache License 2.0

6 votes

private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		// special case for Blink planner to apply batch optimizations
		// note: it also modifies the ExecutionConfig!
		if (executor instanceof ExecutorBase) {
			return ((ExecutorBase) executor).generateStreamGraph(name);
		}
		return streamExecEnv.getStreamGraph(name);
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}

Source File: JsonJobGraphGenerationTest.java From flink with Apache License 2.0

6 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: ClientTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
	assertNotNull(prg.getPreviewPlan());

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}

Source File: ClusterClient.java From flink with Apache License 2.0

6 votes

public static FlinkPlan getOptimizedPlan(Optimizer compiler, PackagedProgram prog, int parallelism)
		throws CompilerException, ProgramInvocationException {
	final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
	try {
		Thread.currentThread().setContextClassLoader(prog.getUserCodeClassLoader());
		if (prog.isUsingProgramEntryPoint()) {
			return getOptimizedPlan(compiler, prog.getPlanWithJars(), parallelism);
		} else if (prog.isUsingInteractiveMode()) {
			// temporary hack to support the optimizer plan preview
			OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(compiler);
			if (parallelism > 0) {
				env.setParallelism(parallelism);
			}

			return env.getOptimizedPlan(prog);
		} else {
			throw new RuntimeException("Couldn't determine program mode.");
		}
	} finally {
		Thread.currentThread().setContextClassLoader(contextClassLoader);
	}
}

Source File: JsonJobGraphGenerationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}

Source File: TempInIterationsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testGetExecutionPlan() {
	try {
		PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
		assertNotNull(prg.getPreviewPlan());

		InetAddress mockAddress = InetAddress.getLocalHost();
		InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345);

		Configuration config = new Configuration();

		config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName());
		config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort());

		Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
		OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1);
		assertNotNull(op);

		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		assertNotNull(dumper.getOptimizerPlanAsJSON(op));

		// test HTML escaping
		PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
		dumper2.setEncodeForHTML(true);
		String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

		assertEquals(-1, htmlEscaped.indexOf('\\'));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ExecutionContext.java From Flink-CEPplus with Apache License 2.0

5 votes

private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		final StreamGraph graph = streamExecEnv.getStreamGraph();
		graph.setJobName(name);
		return graph;
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}

Source File: JobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

private static JobGraph compileJob(ExecutionEnvironment env) {
	Plan plan = env.createProgramPlan();
	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	return jgg.compileJobGraph(op);
}

Source File: ClusterClient.java From flink with Apache License 2.0

5 votes

/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}

Source File: PlanTranslator.java From flink with Apache License 2.0

5 votes

@Override
public String translateToJSONExecutionPlan(Pipeline pipeline) {
	checkArgument(pipeline instanceof Plan, "Given pipeline is not a DataSet Plan.");

	Plan plan = (Plan) pipeline;

	Optimizer opt = new Optimizer(
			new DataStatistics(),
			new DefaultCostEstimator(),
			new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}

Source File: ClusterClient.java From flink with Apache License 2.0

5 votes

public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException {
	Logger log = LoggerFactory.getLogger(ClusterClient.class);

	if (parallelism > 0 && p.getDefaultParallelism() <= 0) {
		log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism);
		p.setDefaultParallelism(parallelism);
	}
	log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism());

	return compiler.compile(p);
}

Source File: PreviewPlanEnvironment.java From flink with Apache License 2.0

5 votes

@Override
public JobExecutionResult execute(String jobName) throws Exception {
	this.plan = createProgramPlan(jobName);
	this.previewPlan = Optimizer.createPreOptimizedPlan(plan);

	// do not go on with anything now!
	throw new OptimizerPlanEnvironment.ProgramAbortException();
}

Source File: LocalExecutor.java From flink with Apache License 2.0

5 votes

/**
 * Creates a JSON representation of the given dataflow's execution plan.
 *
 * @param plan The dataflow plan.
 * @return The dataflow's execution plan, as a JSON string.
 * @throws Exception Thrown, if the optimization process that creates the execution plan failed.
 */
public static String optimizerPlanAsJSON(Plan plan) throws Exception {
	final int parallelism = plan.getDefaultParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? 1 : plan.getDefaultParallelism();

	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	pc.setDefaultParallelism(parallelism);
	OptimizedPlan op = pc.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}

Source File: AccumulatorLiveITCase.java From flink with Apache License 2.0

5 votes

/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}

Source File: GroupReduceNode.java From flink with Apache License 2.0

5 votes

private List<OperatorDescriptorSingle> initPossibleProperties(Partitioner<?> customPartitioner) {
	// see if an internal hint dictates the strategy to use
	final Configuration conf = getOperator().getParameters();
	final String localStrategy = conf.getString(Optimizer.HINT_LOCAL_STRATEGY, null);

	final boolean useCombiner;
	if (localStrategy != null) {
		if (Optimizer.HINT_LOCAL_STRATEGY_SORT.equals(localStrategy)) {
			useCombiner = false;
		}
		else if (Optimizer.HINT_LOCAL_STRATEGY_COMBINING_SORT.equals(localStrategy)) {
			if (!isCombineable()) {
				Optimizer.LOG.warn("Strategy hint for GroupReduce '" + getOperator().getName() +
					"' requires combinable reduce, but user function is not marked combinable.");
			}
			useCombiner = true;
		} else {
			throw new CompilerException("Invalid local strategy hint for match contract: " + localStrategy);
		}
	} else {
		useCombiner = isCombineable();
	}
	
	// check if we can work with a grouping (simple reducer), or if we need ordering because of a group order
	Ordering groupOrder = null;
	if (getOperator() != null) {
		groupOrder = getOperator().getGroupOrder();
		if (groupOrder != null && groupOrder.getNumberOfFields() == 0) {
			groupOrder = null;
		}
	}
	
	OperatorDescriptorSingle props = useCombiner ?
		(this.keys == null ? new AllGroupWithPartialPreGroupProperties() : new GroupReduceWithCombineProperties(this.keys, groupOrder, customPartitioner)) :
		(this.keys == null ? new AllGroupReduceProperties() : new GroupReduceProperties(this.keys, groupOrder, customPartitioner));

	return Collections.singletonList(props);
}

Source File: CompilerTestBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Before
public void setup() {
	Configuration flinkConf = new Configuration();
	this.dataStats = new DataStatistics();
	this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
	this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);

	this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
	this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}

Source File: TempInIterationsTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}

Source File: PreviewPlanEnvironment.java From flink with Apache License 2.0

5 votes

@Override
public String getExecutionPlan() throws Exception {
	Plan plan = createProgramPlan("unused");
	this.previewPlan = Optimizer.createPreOptimizedPlan(plan);

	// do not go on with anything now!
	throw new OptimizerPlanEnvironment.ProgramAbortException();
}

org.apache.flink.optimizer.Optimizer Java Examples