org.apache.flink.optimizer.costs.DefaultCostEstimator Java Exaples

Source File: ClientTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
	assertNotNull(prg.getPreviewPlan());

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: ExecutionContext.java From flink with Apache License 2.0

6 votes

private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		// special case for Blink planner to apply batch optimizations
		// note: it also modifies the ExecutionConfig!
		if (executor instanceof ExecutorBase) {
			return ((ExecutorBase) executor).generateStreamGraph(name);
		}
		return streamExecEnv.getStreamGraph(name);
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}

Source File: ClientTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
	assertNotNull(prg.getPreviewPlan());

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: ClientTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = PackagedProgram.newBuilder()
		.setEntryPointClassName(TestOptimizerPlan.class.getName())
		.setArguments("/dev/random", "/tmp")
		.build();

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true);
	OptimizedPlan op = optimizer.compile(plan);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}

Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0

6 votes

public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices,
		ActorSystemLoader actorSystemLoader) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);
	this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig);

	this.actorSystemLoader = Preconditions.checkNotNull(actorSystemLoader);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}

Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);
	this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig);

	this.actorSystemLoader = new LazyActorSystemLoader(
		highAvailabilityServices,
		Time.milliseconds(lookupTimeout.toMillis()),
		flinkConfig,
		log);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testGetExecutionPlan() {
	try {
		PackagedProgram prg = PackagedProgram.newBuilder()
			.setEntryPointClassName(TestOptimizerPlan.class.getName())
			.setArguments("/dev/random", "/tmp")
			.build();

		InetAddress mockAddress = InetAddress.getLocalHost();
		InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345);

		Configuration config = new Configuration();

		config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName());
		config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort());

		Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
		Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, config, -1, true);
		OptimizedPlan op = optimizer.compile(plan);
		assertNotNull(op);

		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		assertNotNull(dumper.getOptimizerPlanAsJSON(op));

		// test HTML escaping
		PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
		dumper2.setEncodeForHTML(true);
		String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

		assertEquals(-1, htmlEscaped.indexOf('\\'));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ClusterClient.java From flink with Apache License 2.0

5 votes

/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}

Source File: PlanTranslator.java From flink with Apache License 2.0

5 votes

@Override
public String translateToJSONExecutionPlan(Pipeline pipeline) {
	checkArgument(pipeline instanceof Plan, "Given pipeline is not a DataSet Plan.");

	Plan plan = (Plan) pipeline;

	Optimizer opt = new Optimizer(
			new DataStatistics(),
			new DefaultCostEstimator(),
			new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}

Source File: CompilerTestBase.java From flink with Apache License 2.0

5 votes

@Before
public void setup() {
	Configuration flinkConf = new Configuration();
	this.dataStats = new DataStatistics();
	this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
	this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);

	this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
	this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}

Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testGetExecutionPlan() {
	try {
		PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
		assertNotNull(prg.getPreviewPlan());

		InetAddress mockAddress = InetAddress.getLocalHost();
		InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345);

		Configuration config = new Configuration();

		config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName());
		config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort());

		Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
		OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1);
		assertNotNull(op);

		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		assertNotNull(dumper.getOptimizerPlanAsJSON(op));

		// test HTML escaping
		PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
		dumper2.setEncodeForHTML(true);
		String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

		assertEquals(-1, htmlEscaped.indexOf('\\'));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ExecutionPlanJSONGenerator.java From flink with Apache License 2.0

5 votes

@Override
public String getExecutionPlan(Plan plan) {
	Optimizer opt = new Optimizer(
			new DataStatistics(),
			new DefaultCostEstimator(),
			new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}

Source File: ExecutionContext.java From Flink-CEPplus with Apache License 2.0

5 votes

private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		final StreamGraph graph = streamExecEnv.getStreamGraph();
		graph.setJobName(name);
		return graph;
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}

Source File: CompilerTestBase.java From flink with Apache License 2.0

5 votes

@Before
public void setup() {
	Configuration flinkConf = new Configuration();
	this.dataStats = new DataStatistics();
	this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
	this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);

	this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
	this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}

Source File: CompilerTestBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Before
public void setup() {
	Configuration flinkConf = new Configuration();
	this.dataStats = new DataStatistics();
	this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
	this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);

	this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
	this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}

Source File: ExecutionPlanCreationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testGetExecutionPlan() {
	try {
		PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
		assertNotNull(prg.getPreviewPlan());

		InetAddress mockAddress = InetAddress.getLocalHost();
		InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345);

		Configuration config = new Configuration();

		config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName());
		config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort());

		Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
		OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1);
		assertNotNull(op);

		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		assertNotNull(dumper.getOptimizerPlanAsJSON(op));

		// test HTML escaping
		PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
		dumper2.setEncodeForHTML(true);
		String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

		assertEquals(-1, htmlEscaped.indexOf('\\'));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: RemoteExecutor.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
	Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}

Source File: CliFrontendPackageProgramTest.java From flink with Apache License 2.0

4 votes

/**
 * Ensure that we will never have the following error.
 *
 * <pre>
 * 	org.apache.flink.client.program.ProgramInvocationException: The main method caused an error.
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398)
 *		at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301)
 *		at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140)
 *		at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125)
 *		at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439)
 *		at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931)
 *		at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951)
 *	Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102)
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54)
 *		at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322)
 *		at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 *		at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 *		at java.lang.reflect.Method.invoke(Method.java:622)
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383)
 * </pre>
 *
 * <p>The test works as follows:
 *
 * <ul>
 *   <li> Use the CliFrontend to invoke a jar file that loads a class which is only available
 * 	      in the jarfile itself (via a custom classloader)
 *   <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test
 *   <li> the classloader will accept the special class (and return a String.class)
 * </ul>
 */
@Test
public void testPlanWithExternalClass() throws Exception {
	final boolean[] callme = { false }; // create a final object reference, to be able to change its val later

	try {
		String[] arguments = {
				"--classpath", "file:///tmp/foo",
				"--classpath", "file:///tmp/bar",
				"-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(),
				"true", "arg1", "arg2" };
		URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") };
		String[] reducedArguments = { "true", "arg1", "arg2" };

		CommandLine commandLine = CliFrontendParser.parse(CliFrontendParser.RUN_OPTIONS, arguments, true);
		ProgramOptions programOptions = ProgramOptions.create(commandLine);

		assertEquals(getTestJarPath(), programOptions.getJarFilePath());
		assertArrayEquals(classpath, programOptions.getClasspaths().toArray());
		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, programOptions.getEntryPointClassName());
		assertArrayEquals(reducedArguments, programOptions.getProgramArgs());

		PackagedProgram prog = spy(frontend.buildProgram(programOptions));

		ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) {
			@Override
			public Class<?> loadClass(String name) throws ClassNotFoundException {
				if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) {
					callme[0] = true;
					return String.class; // Intentionally return the wrong class.
				} else {
					return super.loadClass(name);
				}
			}
		};
		when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader);

		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName());
		assertArrayEquals(reducedArguments, prog.getArguments());

		Configuration c = new Configuration();
		Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c);

		// we expect this to fail with a "ClassNotFoundException"
		Pipeline pipeline = PackagedProgramUtils.getPipelineFromProgram(prog, c, 666, true);
		FlinkPipelineTranslationUtil.translateToJSONExecutionPlan(pipeline);
		fail("Should have failed with a ClassNotFoundException");
	}
	catch (ProgramInvocationException e) {
		if (!(e.getCause() instanceof ClassNotFoundException)) {
			e.printStackTrace();
			fail("Program didn't throw ClassNotFoundException");
		}
		assertTrue("Classloader was not called", callme[0]);
	}
}

Source File: PackagedProgramUtils.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Creates a {@link JobGraph} with a specified {@link JobID}
 * from the given {@link PackagedProgram}.
 *
 * @param packagedProgram to extract the JobGraph from
 * @param configuration to use for the optimizer and job graph generator
 * @param defaultParallelism for the JobGraph
 * @param jobID the pre-generated job id
 * @return JobGraph extracted from the PackagedProgram
 * @throws ProgramInvocationException if the JobGraph generation failed
 */
public static JobGraph createJobGraph(
		PackagedProgram packagedProgram,
		Configuration configuration,
		int defaultParallelism,
		@Nullable JobID jobID) throws ProgramInvocationException {
	Thread.currentThread().setContextClassLoader(packagedProgram.getUserCodeClassLoader());
	final Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration);
	final FlinkPlan flinkPlan;

	if (packagedProgram.isUsingProgramEntryPoint()) {

		final JobWithJars jobWithJars = packagedProgram.getPlanWithJars();

		final Plan plan = jobWithJars.getPlan();

		if (plan.getDefaultParallelism() <= 0) {
			plan.setDefaultParallelism(defaultParallelism);
		}

		flinkPlan = optimizer.compile(jobWithJars.getPlan());
	} else if (packagedProgram.isUsingInteractiveMode()) {
		final OptimizerPlanEnvironment optimizerPlanEnvironment = new OptimizerPlanEnvironment(optimizer);

		optimizerPlanEnvironment.setParallelism(defaultParallelism);

		flinkPlan = optimizerPlanEnvironment.getOptimizedPlan(packagedProgram);
	} else {
		throw new ProgramInvocationException("PackagedProgram does not have a valid invocation mode.");
	}

	final JobGraph jobGraph;

	if (flinkPlan instanceof StreamingPlan) {
		jobGraph = ((StreamingPlan) flinkPlan).getJobGraph(jobID);
		jobGraph.setSavepointRestoreSettings(packagedProgram.getSavepointSettings());
	} else {
		final JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(configuration);
		jobGraph = jobGraphGenerator.compileJobGraph((OptimizedPlan) flinkPlan, jobID);
	}

	for (URL url : packagedProgram.getAllLibraries()) {
		try {
			jobGraph.addJar(new Path(url.toURI()));
		} catch (URISyntaxException e) {
			throw new ProgramInvocationException("Invalid URL for jar file: " + url + '.', jobGraph.getJobID(), e);
		}
	}

	jobGraph.setClasspaths(packagedProgram.getClasspaths());

	return jobGraph;
}

Source File: CliFrontend.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Executes the info action.
 *
 * @param args Command line arguments for the info action.
 */
protected void info(String[] args) throws CliArgsException, FileNotFoundException, ProgramInvocationException {
	LOG.info("Running 'info' command.");

	final Options commandOptions = CliFrontendParser.getInfoCommandOptions();

	final CommandLine commandLine = CliFrontendParser.parse(commandOptions, args, true);

	InfoOptions infoOptions = new InfoOptions(commandLine);

	// evaluate help flag
	if (infoOptions.isPrintHelp()) {
		CliFrontendParser.printHelpForInfo();
		return;
	}

	if (infoOptions.getJarFilePath() == null) {
		throw new CliArgsException("The program JAR file was not specified.");
	}

	// -------- build the packaged program -------------

	LOG.info("Building program from JAR file");
	final PackagedProgram program = buildProgram(infoOptions);

	try {
		int parallelism = infoOptions.getParallelism();
		if (ExecutionConfig.PARALLELISM_DEFAULT == parallelism) {
			parallelism = defaultParallelism;
		}

		LOG.info("Creating program plan dump");

		Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration);
		FlinkPlan flinkPlan = ClusterClient.getOptimizedPlan(compiler, program, parallelism);

		String jsonPlan = null;
		if (flinkPlan instanceof OptimizedPlan) {
			jsonPlan = new PlanJSONDumpGenerator().getOptimizerPlanAsJSON((OptimizedPlan) flinkPlan);
		} else if (flinkPlan instanceof StreamingPlan) {
			jsonPlan = ((StreamingPlan) flinkPlan).getStreamingPlanAsJSON();
		}

		if (jsonPlan != null) {
			System.out.println("----------------------- Execution Plan -----------------------");
			System.out.println(jsonPlan);
			System.out.println("--------------------------------------------------------------");
		}
		else {
			System.out.println("JSON plan could not be generated.");
		}

		String description = program.getDescription();
		if (description != null) {
			System.out.println();
			System.out.println(description);
		}
		else {
			System.out.println();
			System.out.println("No description provided.");
		}
	}
	finally {
		program.deleteExtractedLibraries();
	}
}

Source File: CliFrontendPackageProgramTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Ensure that we will never have the following error.
 *
 * <pre>
 * 	org.apache.flink.client.program.ProgramInvocationException: The main method caused an error.
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398)
 *		at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301)
 *		at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140)
 *		at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125)
 *		at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439)
 *		at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931)
 *		at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951)
 *	Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102)
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54)
 *		at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322)
 *		at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 *		at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 *		at java.lang.reflect.Method.invoke(Method.java:622)
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383)
 * </pre>
 *
 * <p>The test works as follows:
 *
 * <ul>
 *   <li> Use the CliFrontend to invoke a jar file that loads a class which is only available
 * 	      in the jarfile itself (via a custom classloader)
 *   <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test
 *   <li> the classloader will accept the special class (and return a String.class)
 * </ul>
 */
@Test
public void testPlanWithExternalClass() throws Exception {
	final boolean[] callme = { false }; // create a final object reference, to be able to change its val later

	try {
		String[] arguments = {
				"--classpath", "file:///tmp/foo",
				"--classpath", "file:///tmp/bar",
				"-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(),
				"true", "arg1", "arg2" };
		URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") };
		String[] reducedArguments = { "true", "arg1", "arg2" };

		RunOptions options = CliFrontendParser.parseRunCommand(arguments);
		assertEquals(getTestJarPath(), options.getJarFilePath());
		assertArrayEquals(classpath, options.getClasspaths().toArray());
		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, options.getEntryPointClassName());
		assertArrayEquals(reducedArguments, options.getProgramArgs());

		PackagedProgram prog = spy(frontend.buildProgram(options));

		ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) {
			@Override
			public Class<?> loadClass(String name) throws ClassNotFoundException {
				if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) {
					callme[0] = true;
					return String.class; // Intentionally return the wrong class.
				} else {
					return super.loadClass(name);
				}
			}
		};
		when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader);

		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName());
		assertArrayEquals(reducedArguments, prog.getArguments());

		Configuration c = new Configuration();
		Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c);

		// we expect this to fail with a "ClassNotFoundException"
		ClusterClient.getOptimizedPlanAsJson(compiler, prog, 666);
		fail("Should have failed with a ClassNotFoundException");
	}
	catch (ProgramInvocationException e) {
		if (!(e.getCause() instanceof ClassNotFoundException)) {
			e.printStackTrace();
			fail("Program didn't throw ClassNotFoundException");
		}
		assertTrue("Classloader was not called", callme[0]);
	}
}

Source File: RemoteExecutor.java From flink with Apache License 2.0

4 votes

@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
	Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}

Source File: CliFrontendPackageProgramTest.java From flink with Apache License 2.0

4 votes

/**
 * Ensure that we will never have the following error.
 *
 * <pre>
 * 	org.apache.flink.client.program.ProgramInvocationException: The main method caused an error.
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398)
 *		at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301)
 *		at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140)
 *		at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125)
 *		at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439)
 *		at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931)
 *		at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951)
 *	Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102)
 *		at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54)
 *		at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322)
 *		at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 *		at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 *		at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 *		at java.lang.reflect.Method.invoke(Method.java:622)
 *		at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383)
 * </pre>
 *
 * <p>The test works as follows:
 *
 * <ul>
 *   <li> Use the CliFrontend to invoke a jar file that loads a class which is only available
 * 	      in the jarfile itself (via a custom classloader)
 *   <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test
 *   <li> the classloader will accept the special class (and return a String.class)
 * </ul>
 */
@Test
public void testPlanWithExternalClass() throws Exception {
	final boolean[] callme = { false }; // create a final object reference, to be able to change its val later

	try {
		String[] arguments = {
				"--classpath", "file:///tmp/foo",
				"--classpath", "file:///tmp/bar",
				"-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(),
				"true", "arg1", "arg2" };
		URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") };
		String[] reducedArguments = { "true", "arg1", "arg2" };

		RunOptions options = CliFrontendParser.parseRunCommand(arguments);
		assertEquals(getTestJarPath(), options.getJarFilePath());
		assertArrayEquals(classpath, options.getClasspaths().toArray());
		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, options.getEntryPointClassName());
		assertArrayEquals(reducedArguments, options.getProgramArgs());

		PackagedProgram prog = spy(frontend.buildProgram(options));

		ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) {
			@Override
			public Class<?> loadClass(String name) throws ClassNotFoundException {
				if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) {
					callme[0] = true;
					return String.class; // Intentionally return the wrong class.
				} else {
					return super.loadClass(name);
				}
			}
		};
		when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader);

		assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName());
		assertArrayEquals(reducedArguments, prog.getArguments());

		Configuration c = new Configuration();
		Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c);

		// we expect this to fail with a "ClassNotFoundException"
		ClusterClient.getOptimizedPlanAsJson(compiler, prog, 666);
		fail("Should have failed with a ClassNotFoundException");
	}
	catch (ProgramInvocationException e) {
		if (!(e.getCause() instanceof ClassNotFoundException)) {
			e.printStackTrace();
			fail("Program didn't throw ClassNotFoundException");
		}
		assertTrue("Classloader was not called", callme[0]);
	}
}

Source File: CliFrontend.java From flink with Apache License 2.0

4 votes

/**
 * Executes the info action.
 *
 * @param args Command line arguments for the info action.
 */
protected void info(String[] args) throws CliArgsException, FileNotFoundException, ProgramInvocationException {
	LOG.info("Running 'info' command.");

	final Options commandOptions = CliFrontendParser.getInfoCommandOptions();

	final CommandLine commandLine = CliFrontendParser.parse(commandOptions, args, true);

	InfoOptions infoOptions = new InfoOptions(commandLine);

	// evaluate help flag
	if (infoOptions.isPrintHelp()) {
		CliFrontendParser.printHelpForInfo();
		return;
	}

	if (infoOptions.getJarFilePath() == null) {
		throw new CliArgsException("The program JAR file was not specified.");
	}

	// -------- build the packaged program -------------

	LOG.info("Building program from JAR file");
	final PackagedProgram program = buildProgram(infoOptions);

	try {
		int parallelism = infoOptions.getParallelism();
		if (ExecutionConfig.PARALLELISM_DEFAULT == parallelism) {
			parallelism = defaultParallelism;
		}

		LOG.info("Creating program plan dump");

		Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration);
		FlinkPlan flinkPlan = ClusterClient.getOptimizedPlan(compiler, program, parallelism);

		String jsonPlan = null;
		if (flinkPlan instanceof OptimizedPlan) {
			jsonPlan = new PlanJSONDumpGenerator().getOptimizerPlanAsJSON((OptimizedPlan) flinkPlan);
		} else if (flinkPlan instanceof StreamingPlan) {
			jsonPlan = ((StreamingPlan) flinkPlan).getStreamingPlanAsJSON();
		}

		if (jsonPlan != null) {
			System.out.println("----------------------- Execution Plan -----------------------");
			System.out.println(jsonPlan);
			System.out.println("--------------------------------------------------------------");
		}
		else {
			System.out.println("JSON plan could not be generated.");
		}

		String description = program.getDescription();
		if (description != null) {
			System.out.println();
			System.out.println(description);
		}
		else {
			System.out.println();
			System.out.println("No description provided.");
		}
	}
	finally {
		program.deleteExtractedLibraries();
	}
}

Source File: PackagedProgramUtils.java From flink with Apache License 2.0

4 votes

/**
 * Creates a {@link JobGraph} with a specified {@link JobID}
 * from the given {@link PackagedProgram}.
 *
 * @param packagedProgram to extract the JobGraph from
 * @param configuration to use for the optimizer and job graph generator
 * @param defaultParallelism for the JobGraph
 * @param jobID the pre-generated job id
 * @return JobGraph extracted from the PackagedProgram
 * @throws ProgramInvocationException if the JobGraph generation failed
 */
public static JobGraph createJobGraph(
		PackagedProgram packagedProgram,
		Configuration configuration,
		int defaultParallelism,
		@Nullable JobID jobID) throws ProgramInvocationException {
	Thread.currentThread().setContextClassLoader(packagedProgram.getUserCodeClassLoader());
	final Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration);
	final FlinkPlan flinkPlan;

	if (packagedProgram.isUsingProgramEntryPoint()) {

		final JobWithJars jobWithJars = packagedProgram.getPlanWithJars();

		final Plan plan = jobWithJars.getPlan();

		if (plan.getDefaultParallelism() <= 0) {
			plan.setDefaultParallelism(defaultParallelism);
		}

		flinkPlan = optimizer.compile(jobWithJars.getPlan());
	} else if (packagedProgram.isUsingInteractiveMode()) {
		final OptimizerPlanEnvironment optimizerPlanEnvironment = new OptimizerPlanEnvironment(optimizer);

		optimizerPlanEnvironment.setParallelism(defaultParallelism);

		flinkPlan = optimizerPlanEnvironment.getOptimizedPlan(packagedProgram);
	} else {
		throw new ProgramInvocationException("PackagedProgram does not have a valid invocation mode.");
	}

	final JobGraph jobGraph;

	if (flinkPlan instanceof StreamingPlan) {
		jobGraph = ((StreamingPlan) flinkPlan).getJobGraph(jobID);
		jobGraph.setSavepointRestoreSettings(packagedProgram.getSavepointSettings());
	} else {
		final JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(configuration);
		jobGraph = jobGraphGenerator.compileJobGraph((OptimizedPlan) flinkPlan, jobID);
	}

	for (URL url : packagedProgram.getAllLibraries()) {
		try {
			jobGraph.addJar(new Path(url.toURI()));
		} catch (URISyntaxException e) {
			throw new ProgramInvocationException("Invalid URL for jar file: " + url + '.', jobGraph.getJobID(), e);
		}
	}

	jobGraph.setClasspaths(packagedProgram.getClasspaths());

	return jobGraph;
}

org.apache.flink.optimizer.costs.DefaultCostEstimator Java Examples