org.apache.flink.optimizer.costs.DefaultCostEstimator Java Examples
The following examples show how to use
org.apache.flink.optimizer.costs.DefaultCostEstimator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ClientTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testGetExecutionPlan() throws ProgramInvocationException { PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp"); assertNotNull(prg.getPreviewPlan()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); }
Example #2
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
/** * Verify operator parallelism. * * @param env the Flink execution environment. * @param expectedParallelism expected operator parallelism */ public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) { env.setParallelism(2 * expectedParallelism); Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism); for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #3
Source File: ExecutionContext.java From flink with Apache License 2.0 | 6 votes |
private FlinkPlan createPlan(String name, Configuration flinkConfig) { if (streamExecEnv != null) { // special case for Blink planner to apply batch optimizations // note: it also modifies the ExecutionConfig! if (executor instanceof ExecutorBase) { return ((ExecutorBase) executor).generateStreamGraph(name); } return streamExecEnv.getStreamGraph(name); } else { final int parallelism = execEnv.getParallelism(); final Plan unoptimizedPlan = execEnv.createProgramPlan(); unoptimizedPlan.setJobName(name); final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism); } }
Example #4
Source File: ClientTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGetExecutionPlan() throws ProgramInvocationException { PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp"); assertNotNull(prg.getPreviewPlan()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); }
Example #5
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
/** * Verify operator parallelism. * * @param env the Flink execution environment. * @param expectedParallelism expected operator parallelism */ public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) { env.setParallelism(2 * expectedParallelism); Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism); for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #6
Source File: ClientTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGetExecutionPlan() throws ProgramInvocationException { PackagedProgram prg = PackagedProgram.newBuilder() .setEntryPointClassName(TestOptimizerPlan.class.getName()) .setArguments("/dev/random", "/tmp") .build(); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true); OptimizedPlan op = optimizer.compile(plan); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); }
Example #7
Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public ClusterClient( Configuration flinkConfig, HighAvailabilityServices highAvailabilityServices, boolean sharedHaServices, ActorSystemLoader actorSystemLoader) { this.flinkConfig = Preconditions.checkNotNull(flinkConfig); this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); this.timeout = AkkaUtils.getClientTimeout(flinkConfig); this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig); this.actorSystemLoader = Preconditions.checkNotNull(actorSystemLoader); this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices); this.sharedHaServices = sharedHaServices; }
Example #8
Source File: ClusterClient.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Creates a instance that submits the programs to the JobManager defined in the * configuration. This method will try to resolve the JobManager hostname and throw an exception * if that is not possible. * * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer. * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down */ public ClusterClient( Configuration flinkConfig, HighAvailabilityServices highAvailabilityServices, boolean sharedHaServices) { this.flinkConfig = Preconditions.checkNotNull(flinkConfig); this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); this.timeout = AkkaUtils.getClientTimeout(flinkConfig); this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig); this.actorSystemLoader = new LazyActorSystemLoader( highAvailabilityServices, Time.milliseconds(lookupTimeout.toMillis()), flinkConfig, log); this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices); this.sharedHaServices = sharedHaServices; }
Example #9
Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Verify operator parallelism. * * @param env the Flink execution environment. * @param expectedParallelism expected operator parallelism */ public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) { env.setParallelism(2 * expectedParallelism); Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism); for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #10
Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testGetExecutionPlan() { try { PackagedProgram prg = PackagedProgram.newBuilder() .setEntryPointClassName(TestOptimizerPlan.class.getName()) .setArguments("/dev/random", "/tmp") .build(); InetAddress mockAddress = InetAddress.getLocalHost(); InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345); Configuration config = new Configuration(); config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName()); config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, config, -1, true); OptimizedPlan op = optimizer.compile(plan); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #11
Source File: ClusterClient.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a instance that submits the programs to the JobManager defined in the * configuration. This method will try to resolve the JobManager hostname and throw an exception * if that is not possible. * * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer. * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down */ public ClusterClient( Configuration flinkConfig, HighAvailabilityServices highAvailabilityServices, boolean sharedHaServices) { this.flinkConfig = Preconditions.checkNotNull(flinkConfig); this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); this.timeout = AkkaUtils.getClientTimeout(flinkConfig); this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices); this.sharedHaServices = sharedHaServices; }
Example #12
Source File: PlanTranslator.java From flink with Apache License 2.0 | 5 votes |
@Override public String translateToJSONExecutionPlan(Pipeline pipeline) { checkArgument(pipeline instanceof Plan, "Given pipeline is not a DataSet Plan."); Plan plan = (Plan) pipeline; Optimizer opt = new Optimizer( new DataStatistics(), new DefaultCostEstimator(), new Configuration()); OptimizedPlan optPlan = opt.compile(plan); return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan); }
Example #13
Source File: CompilerTestBase.java From flink with Apache License 2.0 | 5 votes |
@Before public void setup() { Configuration flinkConf = new Configuration(); this.dataStats = new DataStatistics(); this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf); this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf); this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); }
Example #14
Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testGetExecutionPlan() { try { PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp"); assertNotNull(prg.getPreviewPlan()); InetAddress mockAddress = InetAddress.getLocalHost(); InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345); Configuration config = new Configuration(); config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName()); config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #15
Source File: ExecutionPlanJSONGenerator.java From flink with Apache License 2.0 | 5 votes |
@Override public String getExecutionPlan(Plan plan) { Optimizer opt = new Optimizer( new DataStatistics(), new DefaultCostEstimator(), new Configuration()); OptimizedPlan optPlan = opt.compile(plan); return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan); }
Example #16
Source File: ExecutionContext.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private FlinkPlan createPlan(String name, Configuration flinkConfig) { if (streamExecEnv != null) { final StreamGraph graph = streamExecEnv.getStreamGraph(); graph.setJobName(name); return graph; } else { final int parallelism = execEnv.getParallelism(); final Plan unoptimizedPlan = execEnv.createProgramPlan(); unoptimizedPlan.setJobName(name); final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism); } }
Example #17
Source File: CompilerTestBase.java From flink with Apache License 2.0 | 5 votes |
@Before public void setup() { Configuration flinkConf = new Configuration(); this.dataStats = new DataStatistics(); this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf); this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf); this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); }
Example #18
Source File: CompilerTestBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Before public void setup() { Configuration flinkConf = new Configuration(); this.dataStats = new DataStatistics(); this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf); this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf); this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM); }
Example #19
Source File: ExecutionPlanCreationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testGetExecutionPlan() { try { PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp"); assertNotNull(prg.getPreviewPlan()); InetAddress mockAddress = InetAddress.getLocalHost(); InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345); Configuration config = new Configuration(); config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName()); config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort()); Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config); OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1); assertNotNull(op); PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator(); assertNotNull(dumper.getOptimizerPlanAsJSON(op)); // test HTML escaping PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator(); dumper2.setEncodeForHTML(true); String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op); assertEquals(-1, htmlEscaped.indexOf('\\')); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #20
Source File: TestUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Verify algorithm driver parallelism. * * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}. * * @param arguments program arguments * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators */ static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception { // set a reduced parallelism for the algorithm runner final int parallelism = 8; arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism)); // configure the runner but do not execute Runner runner = new Runner(arguments).run(); // we cannot use the actual DataSink since DataSet#writeAsCsv also // executes the program; instead, we receive the DataSet and configure // with a DiscardingOutputFormat DataSet result = runner.getResult(); if (result != null) { result.output(new DiscardingOutputFormat()); } // set the default parallelism higher than the expected parallelism ExecutionEnvironment env = runner.getExecutionEnvironment(); env.setParallelism(2 * parallelism); // add default regex exclusions for the added DiscardingOutputFormat // and also for any preceding GraphKeyTypeTransform List<Pattern> patterns = new ArrayList<>(); patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)")); patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)")); // add user regex patterns for (String largeOperatorName : fullParallelismOperatorNames) { patterns.add(Pattern.compile(largeOperatorName)); } Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); // walk the job plan from sinks to sources List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // skip operators matching an exclusion pattern; these are the // large-scale operators which run at full parallelism boolean matched = false; for (Pattern pattern : patterns) { matched |= pattern.matcher(node.getNodeName()).matches(); } if (!matched) { // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism); } for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #21
Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Verify algorithm driver parallelism. * * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}. * * @param arguments program arguments * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators */ static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception { // set a reduced parallelism for the algorithm runner final int parallelism = 8; arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism)); // configure the runner but do not execute Runner runner = new Runner(arguments).run(); // we cannot use the actual DataSink since DataSet#writeAsCsv also // executes the program; instead, we receive the DataSet and configure // with a DiscardingOutputFormat DataSet result = runner.getResult(); if (result != null) { result.output(new DiscardingOutputFormat()); } // set the default parallelism higher than the expected parallelism ExecutionEnvironment env = runner.getExecutionEnvironment(); env.setParallelism(2 * parallelism); // add default regex exclusions for the added DiscardingOutputFormat // and also for any preceding GraphKeyTypeTransform List<Pattern> patterns = new ArrayList<>(); patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)")); patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)")); // add user regex patterns for (String largeOperatorName : fullParallelismOperatorNames) { patterns.add(Pattern.compile(largeOperatorName)); } Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration()); OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan()); // walk the job plan from sinks to sources List<PlanNode> queue = new ArrayList<>(); queue.addAll(optimizedPlan.getDataSinks()); while (queue.size() > 0) { PlanNode node = queue.remove(queue.size() - 1); // skip operators matching an exclusion pattern; these are the // large-scale operators which run at full parallelism boolean matched = false; for (Pattern pattern : patterns) { matched |= pattern.matcher(node.getNodeName()).matches(); } if (!matched) { // Data sources may have parallelism of 1, so simply check that the node // parallelism has not been increased by setting the default parallelism assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism); } for (Channel channel : node.getInputs()) { queue.add(channel.getSource()); } } }
Example #22
Source File: RemoteExecutor.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override public String getOptimizerPlanAsJSON(Plan plan) throws Exception { Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration()); OptimizedPlan optPlan = opt.compile(plan); return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan); }
Example #23
Source File: CliFrontendPackageProgramTest.java From flink with Apache License 2.0 | 4 votes |
/** * Ensure that we will never have the following error. * * <pre> * org.apache.flink.client.program.ProgramInvocationException: The main method caused an error. * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398) * at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301) * at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140) * at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125) * at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439) * at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931) * at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951) * Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102) * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54) * at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322) * at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380) * at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) * at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) * at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) * at java.lang.reflect.Method.invoke(Method.java:622) * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383) * </pre> * * <p>The test works as follows: * * <ul> * <li> Use the CliFrontend to invoke a jar file that loads a class which is only available * in the jarfile itself (via a custom classloader) * <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test * <li> the classloader will accept the special class (and return a String.class) * </ul> */ @Test public void testPlanWithExternalClass() throws Exception { final boolean[] callme = { false }; // create a final object reference, to be able to change its val later try { String[] arguments = { "--classpath", "file:///tmp/foo", "--classpath", "file:///tmp/bar", "-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(), "true", "arg1", "arg2" }; URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") }; String[] reducedArguments = { "true", "arg1", "arg2" }; CommandLine commandLine = CliFrontendParser.parse(CliFrontendParser.RUN_OPTIONS, arguments, true); ProgramOptions programOptions = ProgramOptions.create(commandLine); assertEquals(getTestJarPath(), programOptions.getJarFilePath()); assertArrayEquals(classpath, programOptions.getClasspaths().toArray()); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, programOptions.getEntryPointClassName()); assertArrayEquals(reducedArguments, programOptions.getProgramArgs()); PackagedProgram prog = spy(frontend.buildProgram(programOptions)); ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) { @Override public Class<?> loadClass(String name) throws ClassNotFoundException { if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) { callme[0] = true; return String.class; // Intentionally return the wrong class. } else { return super.loadClass(name); } } }; when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName()); assertArrayEquals(reducedArguments, prog.getArguments()); Configuration c = new Configuration(); Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c); // we expect this to fail with a "ClassNotFoundException" Pipeline pipeline = PackagedProgramUtils.getPipelineFromProgram(prog, c, 666, true); FlinkPipelineTranslationUtil.translateToJSONExecutionPlan(pipeline); fail("Should have failed with a ClassNotFoundException"); } catch (ProgramInvocationException e) { if (!(e.getCause() instanceof ClassNotFoundException)) { e.printStackTrace(); fail("Program didn't throw ClassNotFoundException"); } assertTrue("Classloader was not called", callme[0]); } }
Example #24
Source File: PackagedProgramUtils.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Creates a {@link JobGraph} with a specified {@link JobID} * from the given {@link PackagedProgram}. * * @param packagedProgram to extract the JobGraph from * @param configuration to use for the optimizer and job graph generator * @param defaultParallelism for the JobGraph * @param jobID the pre-generated job id * @return JobGraph extracted from the PackagedProgram * @throws ProgramInvocationException if the JobGraph generation failed */ public static JobGraph createJobGraph( PackagedProgram packagedProgram, Configuration configuration, int defaultParallelism, @Nullable JobID jobID) throws ProgramInvocationException { Thread.currentThread().setContextClassLoader(packagedProgram.getUserCodeClassLoader()); final Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration); final FlinkPlan flinkPlan; if (packagedProgram.isUsingProgramEntryPoint()) { final JobWithJars jobWithJars = packagedProgram.getPlanWithJars(); final Plan plan = jobWithJars.getPlan(); if (plan.getDefaultParallelism() <= 0) { plan.setDefaultParallelism(defaultParallelism); } flinkPlan = optimizer.compile(jobWithJars.getPlan()); } else if (packagedProgram.isUsingInteractiveMode()) { final OptimizerPlanEnvironment optimizerPlanEnvironment = new OptimizerPlanEnvironment(optimizer); optimizerPlanEnvironment.setParallelism(defaultParallelism); flinkPlan = optimizerPlanEnvironment.getOptimizedPlan(packagedProgram); } else { throw new ProgramInvocationException("PackagedProgram does not have a valid invocation mode."); } final JobGraph jobGraph; if (flinkPlan instanceof StreamingPlan) { jobGraph = ((StreamingPlan) flinkPlan).getJobGraph(jobID); jobGraph.setSavepointRestoreSettings(packagedProgram.getSavepointSettings()); } else { final JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(configuration); jobGraph = jobGraphGenerator.compileJobGraph((OptimizedPlan) flinkPlan, jobID); } for (URL url : packagedProgram.getAllLibraries()) { try { jobGraph.addJar(new Path(url.toURI())); } catch (URISyntaxException e) { throw new ProgramInvocationException("Invalid URL for jar file: " + url + '.', jobGraph.getJobID(), e); } } jobGraph.setClasspaths(packagedProgram.getClasspaths()); return jobGraph; }
Example #25
Source File: CliFrontend.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Executes the info action. * * @param args Command line arguments for the info action. */ protected void info(String[] args) throws CliArgsException, FileNotFoundException, ProgramInvocationException { LOG.info("Running 'info' command."); final Options commandOptions = CliFrontendParser.getInfoCommandOptions(); final CommandLine commandLine = CliFrontendParser.parse(commandOptions, args, true); InfoOptions infoOptions = new InfoOptions(commandLine); // evaluate help flag if (infoOptions.isPrintHelp()) { CliFrontendParser.printHelpForInfo(); return; } if (infoOptions.getJarFilePath() == null) { throw new CliArgsException("The program JAR file was not specified."); } // -------- build the packaged program ------------- LOG.info("Building program from JAR file"); final PackagedProgram program = buildProgram(infoOptions); try { int parallelism = infoOptions.getParallelism(); if (ExecutionConfig.PARALLELISM_DEFAULT == parallelism) { parallelism = defaultParallelism; } LOG.info("Creating program plan dump"); Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration); FlinkPlan flinkPlan = ClusterClient.getOptimizedPlan(compiler, program, parallelism); String jsonPlan = null; if (flinkPlan instanceof OptimizedPlan) { jsonPlan = new PlanJSONDumpGenerator().getOptimizerPlanAsJSON((OptimizedPlan) flinkPlan); } else if (flinkPlan instanceof StreamingPlan) { jsonPlan = ((StreamingPlan) flinkPlan).getStreamingPlanAsJSON(); } if (jsonPlan != null) { System.out.println("----------------------- Execution Plan -----------------------"); System.out.println(jsonPlan); System.out.println("--------------------------------------------------------------"); } else { System.out.println("JSON plan could not be generated."); } String description = program.getDescription(); if (description != null) { System.out.println(); System.out.println(description); } else { System.out.println(); System.out.println("No description provided."); } } finally { program.deleteExtractedLibraries(); } }
Example #26
Source File: CliFrontendPackageProgramTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Ensure that we will never have the following error. * * <pre> * org.apache.flink.client.program.ProgramInvocationException: The main method caused an error. * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398) * at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301) * at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140) * at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125) * at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439) * at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931) * at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951) * Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102) * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54) * at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322) * at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380) * at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) * at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) * at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) * at java.lang.reflect.Method.invoke(Method.java:622) * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383) * </pre> * * <p>The test works as follows: * * <ul> * <li> Use the CliFrontend to invoke a jar file that loads a class which is only available * in the jarfile itself (via a custom classloader) * <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test * <li> the classloader will accept the special class (and return a String.class) * </ul> */ @Test public void testPlanWithExternalClass() throws Exception { final boolean[] callme = { false }; // create a final object reference, to be able to change its val later try { String[] arguments = { "--classpath", "file:///tmp/foo", "--classpath", "file:///tmp/bar", "-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(), "true", "arg1", "arg2" }; URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") }; String[] reducedArguments = { "true", "arg1", "arg2" }; RunOptions options = CliFrontendParser.parseRunCommand(arguments); assertEquals(getTestJarPath(), options.getJarFilePath()); assertArrayEquals(classpath, options.getClasspaths().toArray()); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, options.getEntryPointClassName()); assertArrayEquals(reducedArguments, options.getProgramArgs()); PackagedProgram prog = spy(frontend.buildProgram(options)); ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) { @Override public Class<?> loadClass(String name) throws ClassNotFoundException { if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) { callme[0] = true; return String.class; // Intentionally return the wrong class. } else { return super.loadClass(name); } } }; when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName()); assertArrayEquals(reducedArguments, prog.getArguments()); Configuration c = new Configuration(); Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c); // we expect this to fail with a "ClassNotFoundException" ClusterClient.getOptimizedPlanAsJson(compiler, prog, 666); fail("Should have failed with a ClassNotFoundException"); } catch (ProgramInvocationException e) { if (!(e.getCause() instanceof ClassNotFoundException)) { e.printStackTrace(); fail("Program didn't throw ClassNotFoundException"); } assertTrue("Classloader was not called", callme[0]); } }
Example #27
Source File: RemoteExecutor.java From flink with Apache License 2.0 | 4 votes |
@Override public String getOptimizerPlanAsJSON(Plan plan) throws Exception { Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration()); OptimizedPlan optPlan = opt.compile(plan); return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan); }
Example #28
Source File: CliFrontendPackageProgramTest.java From flink with Apache License 2.0 | 4 votes |
/** * Ensure that we will never have the following error. * * <pre> * org.apache.flink.client.program.ProgramInvocationException: The main method caused an error. * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398) * at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301) * at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140) * at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125) * at org.apache.flink.client.cli.CliFrontend.info(CliFrontend.java:439) * at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:931) * at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:951) * Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102) * at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54) * at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322) * at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380) * at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) * at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) * at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) * at java.lang.reflect.Method.invoke(Method.java:622) * at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383) * </pre> * * <p>The test works as follows: * * <ul> * <li> Use the CliFrontend to invoke a jar file that loads a class which is only available * in the jarfile itself (via a custom classloader) * <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test * <li> the classloader will accept the special class (and return a String.class) * </ul> */ @Test public void testPlanWithExternalClass() throws Exception { final boolean[] callme = { false }; // create a final object reference, to be able to change its val later try { String[] arguments = { "--classpath", "file:///tmp/foo", "--classpath", "file:///tmp/bar", "-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(), "true", "arg1", "arg2" }; URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") }; String[] reducedArguments = { "true", "arg1", "arg2" }; RunOptions options = CliFrontendParser.parseRunCommand(arguments); assertEquals(getTestJarPath(), options.getJarFilePath()); assertArrayEquals(classpath, options.getClasspaths().toArray()); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, options.getEntryPointClassName()); assertArrayEquals(reducedArguments, options.getProgramArgs()); PackagedProgram prog = spy(frontend.buildProgram(options)); ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) { @Override public Class<?> loadClass(String name) throws ClassNotFoundException { if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) { callme[0] = true; return String.class; // Intentionally return the wrong class. } else { return super.loadClass(name); } } }; when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader); assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName()); assertArrayEquals(reducedArguments, prog.getArguments()); Configuration c = new Configuration(); Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c); // we expect this to fail with a "ClassNotFoundException" ClusterClient.getOptimizedPlanAsJson(compiler, prog, 666); fail("Should have failed with a ClassNotFoundException"); } catch (ProgramInvocationException e) { if (!(e.getCause() instanceof ClassNotFoundException)) { e.printStackTrace(); fail("Program didn't throw ClassNotFoundException"); } assertTrue("Classloader was not called", callme[0]); } }
Example #29
Source File: CliFrontend.java From flink with Apache License 2.0 | 4 votes |
/** * Executes the info action. * * @param args Command line arguments for the info action. */ protected void info(String[] args) throws CliArgsException, FileNotFoundException, ProgramInvocationException { LOG.info("Running 'info' command."); final Options commandOptions = CliFrontendParser.getInfoCommandOptions(); final CommandLine commandLine = CliFrontendParser.parse(commandOptions, args, true); InfoOptions infoOptions = new InfoOptions(commandLine); // evaluate help flag if (infoOptions.isPrintHelp()) { CliFrontendParser.printHelpForInfo(); return; } if (infoOptions.getJarFilePath() == null) { throw new CliArgsException("The program JAR file was not specified."); } // -------- build the packaged program ------------- LOG.info("Building program from JAR file"); final PackagedProgram program = buildProgram(infoOptions); try { int parallelism = infoOptions.getParallelism(); if (ExecutionConfig.PARALLELISM_DEFAULT == parallelism) { parallelism = defaultParallelism; } LOG.info("Creating program plan dump"); Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration); FlinkPlan flinkPlan = ClusterClient.getOptimizedPlan(compiler, program, parallelism); String jsonPlan = null; if (flinkPlan instanceof OptimizedPlan) { jsonPlan = new PlanJSONDumpGenerator().getOptimizerPlanAsJSON((OptimizedPlan) flinkPlan); } else if (flinkPlan instanceof StreamingPlan) { jsonPlan = ((StreamingPlan) flinkPlan).getStreamingPlanAsJSON(); } if (jsonPlan != null) { System.out.println("----------------------- Execution Plan -----------------------"); System.out.println(jsonPlan); System.out.println("--------------------------------------------------------------"); } else { System.out.println("JSON plan could not be generated."); } String description = program.getDescription(); if (description != null) { System.out.println(); System.out.println(description); } else { System.out.println(); System.out.println("No description provided."); } } finally { program.deleteExtractedLibraries(); } }
Example #30
Source File: PackagedProgramUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Creates a {@link JobGraph} with a specified {@link JobID} * from the given {@link PackagedProgram}. * * @param packagedProgram to extract the JobGraph from * @param configuration to use for the optimizer and job graph generator * @param defaultParallelism for the JobGraph * @param jobID the pre-generated job id * @return JobGraph extracted from the PackagedProgram * @throws ProgramInvocationException if the JobGraph generation failed */ public static JobGraph createJobGraph( PackagedProgram packagedProgram, Configuration configuration, int defaultParallelism, @Nullable JobID jobID) throws ProgramInvocationException { Thread.currentThread().setContextClassLoader(packagedProgram.getUserCodeClassLoader()); final Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration); final FlinkPlan flinkPlan; if (packagedProgram.isUsingProgramEntryPoint()) { final JobWithJars jobWithJars = packagedProgram.getPlanWithJars(); final Plan plan = jobWithJars.getPlan(); if (plan.getDefaultParallelism() <= 0) { plan.setDefaultParallelism(defaultParallelism); } flinkPlan = optimizer.compile(jobWithJars.getPlan()); } else if (packagedProgram.isUsingInteractiveMode()) { final OptimizerPlanEnvironment optimizerPlanEnvironment = new OptimizerPlanEnvironment(optimizer); optimizerPlanEnvironment.setParallelism(defaultParallelism); flinkPlan = optimizerPlanEnvironment.getOptimizedPlan(packagedProgram); } else { throw new ProgramInvocationException("PackagedProgram does not have a valid invocation mode."); } final JobGraph jobGraph; if (flinkPlan instanceof StreamingPlan) { jobGraph = ((StreamingPlan) flinkPlan).getJobGraph(jobID); jobGraph.setSavepointRestoreSettings(packagedProgram.getSavepointSettings()); } else { final JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(configuration); jobGraph = jobGraphGenerator.compileJobGraph((OptimizedPlan) flinkPlan, jobID); } for (URL url : packagedProgram.getAllLibraries()) { try { jobGraph.addJar(new Path(url.toURI())); } catch (URISyntaxException e) { throw new ProgramInvocationException("Invalid URL for jar file: " + url + '.', jobGraph.getJobID(), e); } } jobGraph.setClasspaths(packagedProgram.getClasspaths()); return jobGraph; }