Java Code Examples for org.apache.beam.sdk.Pipeline#traverseTopologically()
The following examples show how to use
org.apache.beam.sdk.Pipeline#traverseTopologically() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTrackSingle() { options.setRunner(SparkRunner.class); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext( jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis())); Pipeline p = Pipeline.create(options); CreateStream<Integer> emptyStream = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>())); p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0)); assertThat(StreamingSourceTracker.numAssertions, equalTo(1)); }
Example 2
Source File: BeamEnumerableConverter.java From beam with Apache License 2.0 | 6 votes |
private static boolean containsUnboundedPCollection(Pipeline p) { class BoundednessVisitor extends PipelineVisitor.Defaults { IsBounded boundedness = IsBounded.BOUNDED; @Override public void visitValue(PValue value, Node producer) { if (value instanceof PCollection) { boundedness = boundedness.and(((PCollection) value).isBounded()); } } } BoundednessVisitor visitor = new BoundednessVisitor(); p.traverseTopologically(visitor); return visitor.boundedness == IsBounded.UNBOUNDED; }
Example 3
Source File: DataflowRunner.java From beam with Apache License 2.0 | 6 votes |
private boolean containsUnboundedPCollection(Pipeline p) { class BoundednessVisitor extends PipelineVisitor.Defaults { IsBounded boundedness = IsBounded.BOUNDED; @Override public void visitValue(PValue value, Node producer) { if (value instanceof PCollection) { boundedness = boundedness.and(((PCollection) value).isBounded()); } } } BoundednessVisitor visitor = new BoundednessVisitor(); p.traverseTopologically(visitor); return visitor.boundedness == IsBounded.UNBOUNDED; }
Example 4
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** Tests that all reads are consumed by at least one {@link PTransform}. */ @Test public void testUnconsumedReads() throws IOException { DataflowPipelineOptions dataflowOptions = buildPipelineOptions(); RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class); Pipeline p = buildDataflowPipeline(dataflowOptions); p.apply(TextIO.read().from(options.getInput())); DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p); final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean(); p.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { unconsumedSeenAsInput.set(true); } }); assertThat(unconsumedSeenAsInput.get(), is(true)); }
Example 5
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testDiskSizeGbConfig() throws IOException { final Integer diskSizeGb = 1234; DataflowPipelineOptions options = buildPipelineOptions(); options.setDiskSizeGb(diskSizeGb); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true); Job job = DataflowPipelineTranslator.fromOptions(options) .translate( p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb()); }
Example 6
Source File: UnconsumedReads.java From beam with Apache License 2.0 | 6 votes |
public static void ensureAllReadsConsumed(Pipeline pipeline) { final Set<PCollection<?>> unconsumed = new HashSet<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { unconsumed.removeAll(node.getInputs().values()); } @Override public void visitValue(PValue value, Node producer) { String urn = PTransformTranslation.urnForTransformOrNull(producer.getTransform()); if (PTransformTranslation.READ_TRANSFORM_URN.equals(urn)) { unconsumed.add((PCollection<?>) value); } } }); int i = 0; for (PCollection<?> unconsumedPCollection : unconsumed) { consume(unconsumedPCollection, i); i++; } }
Example 7
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSubnetworkConfigMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true); Job job = DataflowPipelineTranslator.fromOptions(options) .translate( p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork()); }
Example 8
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTrackFlattened() { options.setRunner(SparkRunner.class); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext( jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis())); Pipeline p = Pipeline.create(options); CreateStream<Integer> queueStream1 = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); CreateStream<Integer> queueStream2 = CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis())) .emptyBatch(); PCollection<Integer> pcol1 = p.apply(queueStream1); PCollection<Integer> pcol2 = p.apply(queueStream2); PCollection<Integer> flattened = PCollectionList.of(pcol1).and(pcol2).apply(Flatten.pCollections()); flattened.apply(ParDo.of(new PassthroughFn<>())); p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0, 1)); assertThat(StreamingSourceTracker.numAssertions, equalTo(1)); }
Example 9
Source File: PipelineInit.java From component-runtime with Apache License 2.0 | 5 votes |
public static void lazyStart(final JobStateAware.State jobState, final Supplier<DIPipeline> pipelineSupplier) { final AtomicBoolean pipelineStarted = jobState.getPipelineStarted(); if (!pipelineStarted.get() && pipelineStarted.compareAndSet(false, true)) { final Pipeline pipeline = pipelineSupplier.get(); final TransformCounter counter = new TransformCounter(); pipeline.traverseTopologically(counter); if (counter.transforms.get() > 0) { final PipelineResult result = pipeline.run(); new Thread("talend-component-kit-di-pipeline-awaiter") { @Override public void run() { log.debug("Starting to watch beam pipeline"); try { result.waitUntilFinish(); } finally { final PipelineResult.State state = result.getState(); log.debug("Exited pipeline with state {}", state.name()); if (state.isTerminal()) { log.info("Beam pipeline ended"); } else { log.debug("Beam pipeline ended by interruption"); } jobState.getPipelineDone().complete(true); } } }.start(); } else { jobState.getPipelineDone().complete(true); log.warn("A pipeline was created but not transform were found, is your job correctly configured?"); } } }
Example 10
Source File: NemoPipelineRunner.java From nemo with Apache License 2.0 | 5 votes |
/** * Method to run the Pipeline. * @param pipeline the Pipeline to run. * @return The result of the pipeline. */ public NemoPipelineResult run(final Pipeline pipeline) { final DAGBuilder builder = new DAGBuilder<>(); final NemoPipelineVisitor nemoPipelineVisitor = new NemoPipelineVisitor(builder, nemoPipelineOptions); pipeline.traverseTopologically(nemoPipelineVisitor); final DAG dag = builder.build(); final NemoPipelineResult nemoPipelineResult = new NemoPipelineResult(); JobLauncher.launchDAG(dag); return nemoPipelineResult; }
Example 11
Source File: CacheTest.java From beam with Apache License 2.0 | 5 votes |
/** * Test checks how the cache candidates map is populated by the runner when evaluating the * pipeline. */ @Test public void cacheCandidatesUpdaterTest() { SparkPipelineOptions options = createOptions(); Pipeline pipeline = Pipeline.create(options); PCollection<String> pCollection = pipeline.apply(Create.of("foo", "bar")); // First use of pCollection. pCollection.apply(Count.globally()); // Second use of pCollection. PCollectionView<List<String>> view = pCollection.apply(View.asList()); // Internally View.asList() creates a PCollection that underlies the PCollectionView, that // PCollection should not be cached as the SparkRunner does not access that PCollection to // access the PCollectionView. pipeline .apply(Create.of("foo", "baz")) .apply( ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext processContext) { if (processContext.sideInput(view).contains(processContext.element())) { processContext.output(processContext.element()); } } }) .withSideInputs(view)); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options); SparkRunner.CacheVisitor cacheVisitor = new SparkRunner.CacheVisitor(new TransformTranslator.Translator(), ctxt); pipeline.traverseTopologically(cacheVisitor); assertEquals(2L, (long) ctxt.getCacheCandidates().get(pCollection)); assertEquals(1L, ctxt.getCacheCandidates().values().stream().filter(l -> l > 1).count()); }
Example 12
Source File: NemoRunner.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Method to run the Pipeline. * * @param pipeline the Pipeline to run. * @return The result of the pipeline. */ public NemoPipelineResult run(final Pipeline pipeline) { final PipelineVisitor pipelineVisitor = new PipelineVisitor(pipeline, nemoPipelineOptions); pipeline.traverseTopologically(pipelineVisitor); final NemoPipelineResult nemoPipelineResult = new NemoPipelineResult(); CompletableFuture.runAsync(() -> JobLauncher.launchDAG(pipelineVisitor.getConvertedPipeline(), nemoPipelineOptions.getJobName())) .thenRun(nemoPipelineResult::setJobDone); return nemoPipelineResult; }
Example 13
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testScalingAlgorithmMissing() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = buildPipeline(options); p.traverseTopologically(new RecordingPipelineVisitor()); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true); Job job = DataflowPipelineTranslator.fromOptions(options) .translate( p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()) .getJob(); assertEquals(1, job.getEnvironment().getWorkerPools().size()); // Autoscaling settings are always set. assertNull( job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm()); assertEquals( 0, job.getEnvironment() .getWorkerPools() .get(0) .getAutoscalingSettings() .getMaxNumWorkers() .intValue()); }
Example 14
Source File: SparkRunner.java From beam with Apache License 2.0 | 5 votes |
/** Visit the pipeline to determine the translation mode (batch/streaming). */ private void detectTranslationMode(Pipeline pipeline) { TranslationModeDetector detector = new TranslationModeDetector(); pipeline.traverseTopologically(detector); if (detector.getTranslationMode().equals(TranslationMode.STREAMING)) { // set streaming mode if it's a streaming pipeline this.mOptions.setStreaming(true); } }
Example 15
Source File: DirectRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public DirectPipelineResult run(Pipeline pipeline) { try { options = MAPPER .readValue(MAPPER.writeValueAsBytes(options), PipelineOptions.class) .as(DirectOptions.class); } catch (IOException e) { throw new IllegalArgumentException( "PipelineOptions specified failed to serialize to JSON.", e); } pipeline.replaceAll(defaultTransformOverrides()); MetricsEnvironment.setMetricsSupported(true); try { DirectGraphVisitor graphVisitor = new DirectGraphVisitor(); pipeline.traverseTopologically(graphVisitor); @SuppressWarnings("rawtypes") KeyedPValueTrackingVisitor keyedPValueVisitor = KeyedPValueTrackingVisitor.create(); pipeline.traverseTopologically(keyedPValueVisitor); DisplayDataValidator.validatePipeline(pipeline); DisplayDataValidator.validateOptions(options); ExecutorService metricsPool = Executors.newCachedThreadPool( new ThreadFactoryBuilder() .setThreadFactory(MoreExecutors.platformThreadFactory()) .setDaemon(false) // otherwise you say you want to leak, please don't! .setNameFormat("direct-metrics-counter-committer") .build()); DirectGraph graph = graphVisitor.getGraph(); EvaluationContext context = EvaluationContext.create( clockSupplier.get(), Enforcement.bundleFactoryFor(enabledEnforcements, graph), graph, keyedPValueVisitor.getKeyedPValues(), metricsPool); TransformEvaluatorRegistry registry = TransformEvaluatorRegistry.javaSdkNativeRegistry(context, options); PipelineExecutor executor = ExecutorServiceParallelExecutor.create( options.getTargetParallelism(), registry, Enforcement.defaultModelEnforcements(enabledEnforcements), context, metricsPool); executor.start(graph, RootProviderRegistry.javaNativeRegistry(context, options)); DirectPipelineResult result = new DirectPipelineResult(executor, context); if (options.isBlockOnRun()) { try { result.waitUntilFinish(); } catch (UserCodeException userException) { throw new PipelineExecutionException(userException.getCause()); } catch (Throwable t) { if (t instanceof RuntimeException) { throw (RuntimeException) t; } throw new RuntimeException(t); } } return result; } finally { MetricsEnvironment.setMetricsSupported(false); } }
Example 16
Source File: SparkRunner.java From beam with Apache License 2.0 | 4 votes |
/** Evaluator that update/populate the cache candidates. */ public static void updateCacheCandidates( Pipeline pipeline, SparkPipelineTranslator translator, EvaluationContext evaluationContext) { CacheVisitor cacheVisitor = new CacheVisitor(translator, evaluationContext); pipeline.traverseTopologically(cacheVisitor); }
Example 17
Source File: PViewToIdMapper.java From beam with Apache License 2.0 | 4 votes |
public static Map<PValue, String> buildIdMap(Pipeline pipeline) { final PViewToIdMapper mapper = new PViewToIdMapper(); pipeline.traverseTopologically(mapper); return mapper.getIdMap(); }
Example 18
Source File: DisplayDataEvaluator.java From beam with Apache License 2.0 | 4 votes |
private static Set<DisplayData> displayDataForPipeline(Pipeline pipeline, PTransform<?, ?> root) { PrimitiveDisplayDataPTransformVisitor visitor = new PrimitiveDisplayDataPTransformVisitor(root); pipeline.traverseTopologically(visitor); return visitor.getPrimitivesDisplayData(); }
Example 19
Source File: FlinkPipelineTranslator.java From beam with Apache License 2.0 | 2 votes |
/** * Translates the pipeline by passing this class as a visitor. * * @param pipeline The pipeline to be translated */ public void translate(Pipeline pipeline) { pipeline.traverseTopologically(this); }
Example 20
Source File: Twister2PipelineTranslator.java From twister2 with Apache License 2.0 | 2 votes |
/** * Translates the pipeline by passing this class as a visitor. * * @param pipeline The pipeline to be translated */ public void translate(Pipeline pipeline) { pipeline.traverseTopologically(this); }