org.apache.beam.runners.spark.SparkRunner Java Examples
The following examples show how to use
org.apache.beam.runners.spark.SparkRunner.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeamPipelineEngine.java From hop with Apache License 2.0 | 6 votes |
private PipelineResult executePipeline( org.apache.beam.sdk.Pipeline pipeline ) throws HopException { RunnerType runnerType = beamEngineRunConfiguration.getRunnerType(); switch ( runnerType ) { case Direct: return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); case Flink: return FlinkRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); case DataFlow: return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); case Spark: return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); default: throw new HopException( "Execution on runner '" + runnerType.name() + "' is not supported yet." ); } }
Example #2
Source File: HopPipelineMetaToBeamPipelineConverter.java From hop with Apache License 2.0 | 6 votes |
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws HopException { if ( runnerType == null ) { throw new HopException( "Please specify a valid runner type" ); } switch ( runnerType ) { case Direct: return DirectRunner.class; case Flink: return FlinkRunner.class; case Spark: return SparkRunner.class; case DataFlow: return DataflowRunner.class; default: throw new HopException( "Unsupported runner type: " + runnerType.name() ); } }
Example #3
Source File: TranslationUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Reject state and timers {@link DoFn}. * * @param doFn the {@link DoFn} to possibly reject. */ public static void rejectStateAndTimers(DoFn<?, ?> doFn) { DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); if (signature.stateDeclarations().size() > 0) { throw new UnsupportedOperationException( String.format( "Found %s annotations on %s, but %s cannot yet be used with state in the %s.", DoFn.StateId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), SparkRunner.class.getSimpleName())); } if (signature.timerDeclarations().size() > 0 || signature.timerFamilyDeclarations().size() > 0) { throw new UnsupportedOperationException( String.format( "Found %s annotations on %s, but %s cannot yet be used with timers in the %s.", DoFn.TimerId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), SparkRunner.class.getSimpleName())); } }
Example #4
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTrackSingle() { options.setRunner(SparkRunner.class); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext( jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis())); Pipeline p = Pipeline.create(options); CreateStream<Integer> emptyStream = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>())); p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0)); assertThat(StreamingSourceTracker.numAssertions, equalTo(1)); }
Example #5
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTrackFlattened() { options.setRunner(SparkRunner.class); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext( jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis())); Pipeline p = Pipeline.create(options); CreateStream<Integer> queueStream1 = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); CreateStream<Integer> queueStream2 = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); PCollection<Integer> pcol1 = p.apply(queueStream1); PCollection<Integer> pcol2 = p.apply(queueStream2); PCollection<Integer> flattened = PCollectionList.of(pcol1).and(pcol2).apply(Flatten.pCollections()); flattened.apply(ParDo.of(new PassthroughFn<>())); p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0, 1)); assertThat(StreamingSourceTracker.numAssertions, equalTo(1)); }
Example #6
Source File: TransMetaPipelineConverter.java From kettle-beam with Apache License 2.0 | 5 votes |
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws KettleException { if (runnerType==null) { throw new KettleException( "Please specify a valid runner type"); } switch(runnerType) { case Direct: return DirectRunner.class; case Flink: return FlinkRunner.class; case Spark: return SparkRunner.class; case DataFlow: return DataflowRunner.class; default: throw new KettleException( "Unsupported runner type: "+runnerType.name() ); } }
Example #7
Source File: KettleBeamPipelineExecutor.java From kettle-beam with Apache License 2.0 | 5 votes |
private PipelineResult asyncExecutePipeline( Pipeline pipeline ) throws KettleException { RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( jobConfig.getRunnerTypeName() ) ); if (runnerType==null) { throw new KettleException( "Runner type '"+jobConfig.getRunnerTypeName()+"' is not recognized"); } switch ( runnerType ) { case Direct: return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); case Flink: return FlinkRunner.fromOptions(pipeline.getOptions()).run( pipeline ); case DataFlow: return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); case Spark: return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline ); default: throw new KettleException( "Execution on runner '" + runnerType.name() + "' is not supported yet, sorry." ); } }
Example #8
Source File: SparkRunnerStreamingContextFactory.java From beam with Apache License 2.0 | 5 votes |
@Override public JavaStreamingContext call() throws Exception { LOG.info("Creating a new Spark Streaming Context"); // validate unbounded read properties. checkArgument( options.getMinReadTimeMillis() < options.getBatchIntervalMillis(), "Minimum read time has to be less than batch time."); checkArgument( options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1, "Read time percentage is bound to (0, 1)."); SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()); Duration batchDuration = new Duration(options.getBatchIntervalMillis()); LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds()); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration); // We must first init accumulators since translators expect them to be instantiated. SparkRunner.initAccumulators(options, jsc); // do not need to create a MetricsPusher instance here because if is called in SparkRunner.run() EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc); // update cache candidates SparkRunner.updateCacheCandidates(pipeline, translator, ctxt); pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt)); ctxt.computeOutputs(); checkpoint(jssc, checkpointDir); return jssc; }
Example #9
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 5 votes |
private StreamingSourceTracker( JavaStreamingContext jssc, Pipeline pipeline, Class<? extends PTransform> transformClassToAssert, Integer... expected) { this.ctxt = new EvaluationContext(jssc.sparkContext(), pipeline, options, jssc); this.evaluator = new SparkRunner.Evaluator( new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()), ctxt); this.transformClassToAssert = transformClassToAssert; this.expected = expected; }
Example #10
Source File: SparkRunnerTestUtils.java From components with Apache License 2.0 | 5 votes |
public Pipeline createPipeline() { SparkContextOptions sparkOpts = options.as(SparkContextOptions.class); sparkOpts.setFilesToStage(emptyList()); SparkConf conf = new SparkConf(); conf.setAppName(appName); conf.setMaster("local[2]"); conf.set("spark.driver.allowMultipleContexts", "true"); JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf)); sparkOpts.setProvidedSparkContext(jsc); sparkOpts.setUsesProvidedSparkContext(true); sparkOpts.setRunner(SparkRunner.class); return Pipeline.create(sparkOpts); }
Example #11
Source File: S3SparkRuntimeTestIT.java From components with Apache License 2.0 | 5 votes |
@Before public void setupLazyAvroCoder() { options = PipelineOptionsFactory.as(SparkPipelineOptions.class); options.setRunner(SparkRunner.class); options.setSparkMaster("local"); options.setStreaming(false); pWrite = Pipeline.create(options); pRead = Pipeline.create(options); }
Example #12
Source File: SparkIntegrationTestResource.java From components with Apache License 2.0 | 5 votes |
/** * @return the options used to create this pipeline. These can be or changed before the Pipeline is created. */ public SparkContextOptions getOptions() { if (options == null) { options = PipelineOptionsFactory.as(SparkContextOptions.class); options.setRunner(SparkRunner.class); options.setFilesToStage(emptyList()); // useless for us and broken on java > 8 with beam <= 2.10.0 } return options; }