Java Code Examples for org.apache.beam.sdk.Pipeline#replaceAll()
The following examples show how to use
org.apache.beam.sdk.Pipeline#replaceAll() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSamzaLocalExecutionEnvironmentConfig() { SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestEnvConfig"); options.setRunner(SamzaRunner.class); options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config = configBuilder.build(); assertTrue( Maps.difference(config, ConfigBuilder.localRunConfig()).entriesOnlyOnRight().isEmpty()); }
Example 2
Source File: Twister2Runner.java From beam with Apache License 2.0 | 5 votes |
@Override public PipelineResult run(Pipeline pipeline) { // create a worker and pass in the pipeline and then do the translation Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options); LOG.info("Translating pipeline to Twister2 program."); pipeline.replaceAll(getDefaultOverrides()); env.translate(pipeline); setupSystem(options); Config config = ResourceAllocator.loadConfig(new HashMap<>()); JobConfig jobConfig = new JobConfig(); jobConfig.put(SIDEINPUTS, extractNames(env.getSideInputs())); jobConfig.put(LEAVES, extractNames(env.getLeaves())); jobConfig.put(GRAPH, env.getTSetGraph()); int workers = options.getParallelism(); Twister2Job twister2Job = Twister2Job.newBuilder() .setJobName(options.getJobName()) .setWorkerClass(BeamBatchWorker.class) .addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers) .setConfig(jobConfig) .build(); Twister2JobState jobState = Twister2Submitter.submitJob(twister2Job, config); Twister2PipelineResult result = new Twister2PipelineResult(); // TODO: Need to fix the check for "RUNNING" once fix for this is done on Twister2 end. if (jobState.getJobstate() == DriverJobState.FAILED || jobState.getJobstate() == DriverJobState.RUNNING) { throw new RuntimeException("Pipeline execution failed", jobState.getCause()); } else { result.setState(PipelineResult.State.DONE); } return result; }
Example 3
Source File: Twister2Runner.java From beam with Apache License 2.0 | 5 votes |
public PipelineResult runTest(Pipeline pipeline) { // create a worker and pass in the pipeline and then do the translation Twister2PipelineExecutionEnvironment env = new Twister2PipelineExecutionEnvironment(options); LOG.info("Translating pipeline to Twister2 program."); pipeline.replaceAll(getDefaultOverrides()); env.translate(pipeline); setupSystemTest(options); Map configMap = new HashMap(); configMap.put(SIDEINPUTS, extractNames(env.getSideInputs())); configMap.put(LEAVES, extractNames(env.getLeaves())); configMap.put(GRAPH, env.getTSetGraph()); configMap.put("twister2.network.buffer.size", 32000); configMap.put("twister2.network.sendBuffer.count", 1); Config config = ResourceAllocator.loadConfig(configMap); JobConfig jobConfig = new JobConfig(); int workers = options.getParallelism(); Twister2Job twister2Job = Twister2Job.newBuilder() .setJobName(options.getJobName()) .setWorkerClass(BeamBatchWorker.class) .addComputeResource(options.getWorkerCPUs(), options.getRamMegaBytes(), workers) .setConfig(jobConfig) .build(); Twister2JobState jobState = LocalSubmitter.submitJob(twister2Job, config); Twister2PipelineResult result = new Twister2PipelineResult(); // TODO: Need to fix the check for "RUNNING" once fix for this is done on Twister2 end. if (jobState.getJobstate() == DriverJobState.FAILED || jobState.getJobstate() == DriverJobState.RUNNING) { throw new RuntimeException("Pipeline execution failed", jobState.getCause()); } else { result.setState(PipelineResult.State.DONE); } return result; }
Example 4
Source File: FlinkPipelineExecutionEnvironment.java From beam with Apache License 2.0 | 5 votes |
/** * Depending on if the job is a Streaming or a Batch one, this method creates the necessary * execution environment and pipeline translator, and translates the {@link * org.apache.beam.sdk.values.PCollection} program into a {@link * org.apache.flink.api.java.DataSet} or {@link * org.apache.flink.streaming.api.datastream.DataStream} one. */ public void translate(Pipeline pipeline) { this.flinkBatchEnv = null; this.flinkStreamEnv = null; final boolean hasUnboundedOutput = PipelineTranslationModeOptimizer.hasUnboundedOutput(pipeline); if (hasUnboundedOutput) { LOG.info("Found unbounded PCollection. Switching to streaming execution."); options.setStreaming(true); } // Staged files need to be set before initializing the execution environments prepareFilesToStageForRemoteClusterExecution(options); FlinkPipelineTranslator translator; if (options.isStreaming()) { this.flinkStreamEnv = FlinkExecutionEnvironments.createStreamExecutionEnvironment( options, options.getFilesToStage()); if (hasUnboundedOutput && !flinkStreamEnv.getCheckpointConfig().isCheckpointingEnabled()) { LOG.warn( "UnboundedSources present which rely on checkpointing, but checkpointing is disabled."); } translator = new FlinkStreamingPipelineTranslator(flinkStreamEnv, options); } else { this.flinkBatchEnv = FlinkExecutionEnvironments.createBatchExecutionEnvironment( options, options.getFilesToStage()); translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options); } // Transform replacements need to receive the finalized PipelineOptions // including execution mode (batch/streaming) and parallelism. pipeline.replaceAll(FlinkTransformOverrides.getDefaultOverrides(options)); translator.translate(pipeline); }
Example 5
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBeamStoreConfig() { SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestStoreConfig"); options.setRunner(SamzaRunner.class); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config = configBuilder.build(); assertEquals( RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.beamStore.factory")); assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde")); assertEquals("byteSerde", config.get("stores.beamStore.msg.serde")); assertNull(config.get("stores.beamStore.changelog")); options.setStateDurable(true); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config2 = configBuilder.build(); assertEquals( "TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog")); }
Example 6
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSamzaYarnExecutionEnvironmentConfig() { final String yarnPackagePath = "yarn.package.path"; SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestEnvConfig"); options.setRunner(SamzaRunner.class); options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.YARN); options.setConfigOverride( ImmutableMap.<String, String>builder() .put( yarnPackagePath, "file://${basedir}/target/${project.artifactId}-${pom.version}-dist.tar.gz") .build()); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); try { Config config = configBuilder.build(); assertEquals(config.get(APP_RUNNER_CLASS), RemoteApplicationRunner.class.getName()); assertEquals(config.get(JOB_FACTORY_CLASS), YarnJobFactory.class.getName()); } catch (IllegalArgumentException e) { throw new AssertionError( String.format( "Failed to validate correct configs for %s samza execution environment", SamzaExecutionEnvironment.YARN), e); } }
Example 7
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSamzaStandAloneExecutionEnvironmentConfig() { SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestEnvConfig"); options.setRunner(SamzaRunner.class); options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.STANDALONE); options.setConfigOverride( ImmutableMap.<String, String>builder().put(ZkConfig.ZK_CONNECT, "localhost:2181").build()); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); try { Config config = configBuilder.build(); assertEquals(config.get(APP_RUNNER_CLASS), LocalApplicationRunner.class.getName()); assertEquals( config.get(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY), ZkJobCoordinatorFactory.class.getName()); } catch (IllegalArgumentException e) { throw new AssertionError( String.format( "Failed to validate correct configs for %s samza execution environment", SamzaExecutionEnvironment.STANDALONE), e); } }
Example 8
Source File: DataflowRunner.java From beam with Apache License 2.0 | 5 votes |
@VisibleForTesting protected void replaceTransforms(Pipeline pipeline) { boolean streaming = options.isStreaming() || containsUnboundedPCollection(pipeline); // Ensure all outputs of all reads are consumed before potentially replacing any // Read PTransforms UnconsumedReads.ensureAllReadsConsumed(pipeline); pipeline.replaceAll(getOverrides(streaming)); }
Example 9
Source File: JetRunner.java From beam with Apache License 2.0 | 4 votes |
private void normalize(Pipeline pipeline) { pipeline.replaceAll(getDefaultOverrides()); UnconsumedReads.ensureAllReadsConsumed(pipeline); }
Example 10
Source File: PipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
public static void replaceTransforms(Pipeline pipeline, StreamingOptions options) { pipeline.replaceAll(SparkTransformOverrides.getDefaultOverrides(options.isStreaming())); }
Example 11
Source File: SparkRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public SparkPipelineResult run(final Pipeline pipeline) { LOG.info("Executing pipeline using the SparkRunner."); final SparkPipelineResult result; final Future<?> startPipeline; final SparkPipelineTranslator translator; final ExecutorService executorService = Executors.newSingleThreadExecutor(); MetricsEnvironment.setMetricsSupported(true); // visit the pipeline to determine the translation mode detectTranslationMode(pipeline); pipeline.replaceAll(SparkTransformOverrides.getDefaultOverrides(mOptions.isStreaming())); prepareFilesToStage(mOptions); if (mOptions.isStreaming()) { CheckpointDir checkpointDir = new CheckpointDir(mOptions.getCheckpointDir()); SparkRunnerStreamingContextFactory streamingContextFactory = new SparkRunnerStreamingContextFactory(pipeline, mOptions, checkpointDir); final JavaStreamingContext jssc = JavaStreamingContext.getOrCreate( checkpointDir.getSparkCheckpointDir().toString(), streamingContextFactory); // Checkpoint aggregator/metrics values jssc.addStreamingListener( new JavaStreamingListenerWrapper( new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener())); jssc.addStreamingListener( new JavaStreamingListenerWrapper( new MetricsAccumulator.AccumulatorCheckpointingSparkListener())); // register user-defined listeners. for (JavaStreamingListener listener : mOptions.as(SparkContextOptions.class).getListeners()) { LOG.info("Registered listener {}." + listener.getClass().getSimpleName()); jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener)); } // register Watermarks listener to broadcast the advanced WMs. jssc.addStreamingListener( new JavaStreamingListenerWrapper(new WatermarkAdvancingStreamingListener())); // The reason we call initAccumulators here even though it is called in // SparkRunnerStreamingContextFactory is because the factory is not called when resuming // from checkpoint (When not resuming from checkpoint initAccumulators will be called twice // but this is fine since it is idempotent). initAccumulators(mOptions, jssc.sparkContext()); startPipeline = executorService.submit( () -> { LOG.info("Starting streaming pipeline execution."); jssc.start(); }); executorService.shutdown(); result = new SparkPipelineResult.StreamingMode(startPipeline, jssc); } else { // create the evaluation context final JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions); final EvaluationContext evaluationContext = new EvaluationContext(jsc, pipeline, mOptions); translator = new TransformTranslator.Translator(); // update the cache candidates updateCacheCandidates(pipeline, translator, evaluationContext); initAccumulators(mOptions, jsc); startPipeline = executorService.submit( () -> { pipeline.traverseTopologically(new Evaluator(translator, evaluationContext)); evaluationContext.computeOutputs(); LOG.info("Batch pipeline execution complete."); }); executorService.shutdown(); result = new SparkPipelineResult.BatchMode(startPipeline, jsc); } if (mOptions.getEnableSparkMetricSinks()) { registerMetricsSource(mOptions.getAppName()); } // it would have been better to create MetricsPusher from runner-core but we need // runner-specific // MetricsContainerStepMap MetricsPusher metricsPusher = new MetricsPusher( MetricsAccumulator.getInstance().value(), mOptions.as(MetricsOptions.class), result); metricsPusher.start(); return result; }
Example 12
Source File: SamzaRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public SamzaPipelineResult run(Pipeline pipeline) { MetricsEnvironment.setMetricsSupported(true); if (LOG.isDebugEnabled()) { LOG.debug("Pre-processed Beam pipeline:\n{}", PipelineDotRenderer.toDotString(pipeline)); } pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final String dotGraph = PipelineDotRenderer.toDotString(pipeline); LOG.info("Beam pipeline DOT graph:\n{}", dotGraph); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); configBuilder.put(BEAM_DOT_GRAPH, dotGraph); final Config config = configBuilder.build(); options.setConfigOverride(config); if (listener != null) { listener.onInit(config, options); } final SamzaExecutionContext executionContext = new SamzaExecutionContext(options); final Map<String, MetricsReporterFactory> reporterFactories = getMetricsReporters(); final StreamApplication app = appDescriptor -> { appDescriptor.withApplicationContainerContextFactory(executionContext.new Factory()); appDescriptor.withMetricsReporterFactories(reporterFactories); SamzaPipelineTranslator.translate( pipeline, new TranslationContext(appDescriptor, idMap, options)); }; // perform a final round of validation for the pipeline options now that all configs are // generated SamzaPipelineOptionsValidator.validate(options); ApplicationRunner runner = runSamzaApp(app, config); return new SamzaPipelineResult(app, runner, executionContext, listener, config); }
Example 13
Source File: DirectRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public DirectPipelineResult run(Pipeline pipeline) { try { options = MAPPER .readValue(MAPPER.writeValueAsBytes(options), PipelineOptions.class) .as(DirectOptions.class); } catch (IOException e) { throw new IllegalArgumentException( "PipelineOptions specified failed to serialize to JSON.", e); } pipeline.replaceAll(defaultTransformOverrides()); MetricsEnvironment.setMetricsSupported(true); try { DirectGraphVisitor graphVisitor = new DirectGraphVisitor(); pipeline.traverseTopologically(graphVisitor); @SuppressWarnings("rawtypes") KeyedPValueTrackingVisitor keyedPValueVisitor = KeyedPValueTrackingVisitor.create(); pipeline.traverseTopologically(keyedPValueVisitor); DisplayDataValidator.validatePipeline(pipeline); DisplayDataValidator.validateOptions(options); ExecutorService metricsPool = Executors.newCachedThreadPool( new ThreadFactoryBuilder() .setThreadFactory(MoreExecutors.platformThreadFactory()) .setDaemon(false) // otherwise you say you want to leak, please don't! .setNameFormat("direct-metrics-counter-committer") .build()); DirectGraph graph = graphVisitor.getGraph(); EvaluationContext context = EvaluationContext.create( clockSupplier.get(), Enforcement.bundleFactoryFor(enabledEnforcements, graph), graph, keyedPValueVisitor.getKeyedPValues(), metricsPool); TransformEvaluatorRegistry registry = TransformEvaluatorRegistry.javaSdkNativeRegistry(context, options); PipelineExecutor executor = ExecutorServiceParallelExecutor.create( options.getTargetParallelism(), registry, Enforcement.defaultModelEnforcements(enabledEnforcements), context, metricsPool); executor.start(graph, RootProviderRegistry.javaNativeRegistry(context, options)); DirectPipelineResult result = new DirectPipelineResult(executor, context); if (options.isBlockOnRun()) { try { result.waitUntilFinish(); } catch (UserCodeException userException) { throw new PipelineExecutionException(userException.getCause()); } catch (Throwable t) { if (t instanceof RuntimeException) { throw (RuntimeException) t; } throw new RuntimeException(t); } } return result; } finally { MetricsEnvironment.setMetricsSupported(false); } }
Example 14
Source File: DirectGraphs.java From beam with Apache License 2.0 | 4 votes |
public static void performDirectOverrides(Pipeline p) { p.replaceAll( DirectRunner.fromOptions(PipelineOptionsFactory.create().as(DirectOptions.class)) .defaultTransformOverrides()); }