org.apache.beam.sdk.io.Read Java Examples
The following examples show how to use
org.apache.beam.sdk.io.Read.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundedReadFromBoundedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testBoundedToUnboundedSourceAdapter() throws Exception { long numElements = 100; BoundedSource<Long> boundedSource = CountingSource.upTo(numElements); UnboundedSource<Long, Checkpoint<Long>> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource); PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements)); // Count == numElements PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements); // Unique count == numElements PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally())) .isEqualTo(numElements); // Min == 0 PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L); // Max == numElements-1 PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1); p.run(); }
Example #2
Source File: BeamSumDemo.java From scotty-window-processor with Apache License 2.0 | 6 votes |
public static void main(String[] args) { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create(); Pipeline p = Pipeline.create(options); System.out.println("Running Pipeline\n " + p.getOptions()); PCollection<KV<Integer, Integer>> data = p.begin().apply(Read.from(new DataGeneratorSource(0, new TimeStampGenerator()))); KeyedScottyWindowOperator<Integer, Integer> scottyWindowDoFn = new KeyedScottyWindowOperator<Integer, Integer>(0, new Sum()); scottyWindowDoFn.addWindow(new TumblingWindow(WindowMeasure.Time, 5000)); //scottyWindowDoFn.addWindow(new SlidingWindow(WindowMeasure.Time, 2000, 1000)); //scottyWindowDoFn.addWindow(new SessionWindow(WindowMeasure.Time, 2000)); //Apply Scotty Windowing PCollection<String> result = data.apply(ParDo.of(scottyWindowDoFn)); //Print window results result.apply(ParDo.of(new printObject())); p.run().waitUntilFinish(); }
Example #3
Source File: BigQueryIOIT.java From beam with Apache License 2.0 | 6 votes |
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) { Pipeline pipeline = Pipeline.create(options); BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod()); pipeline .apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))) .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))) .apply("Map records", ParDo.of(new MapKVToV())) .apply( "Write to BQ", writeIO .to(tableQualifier) .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)) .withMethod(method) .withSchema( new TableSchema() .setFields( Collections.singletonList( new TableFieldSchema().setName("data").setType("BYTES"))))); PipelineResult pipelineResult = pipeline.run(); pipelineResult.waitUntilFinish(); extractAndPublishTime(pipelineResult, metricName); }
Example #4
Source File: DirectGraphVisitorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void getRootTransformsContainsRootTransforms() { PCollection<String> created = p.apply(Create.of("foo", "bar")); PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L))); PCollection<Long> unCounted = p.apply(GenerateSequence.from(0)); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); assertThat(graph.getRootTransforms(), hasSize(3)); assertThat( graph.getRootTransforms(), Matchers.containsInAnyOrder( new Object[] { graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted) })); for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) { // Root transforms will have no inputs assertThat(root.getInputs().entrySet(), emptyIterable()); assertThat( Iterables.getOnlyElement(root.getOutputs().values()), Matchers.<POutput>isOneOf(created, counted, unCounted)); } }
Example #5
Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static Pipeline build(PipelineOptions pipelineOptions) { Pipeline pipeline = Pipeline.create(pipelineOptions); pipeline .apply("unbounded-source", Read.from(new MyUnboundedSource("beam-input"))) .apply("reformat-and-timestamp", ParDo.of(new MyEnrichAndReformatFn())) .apply("window", Window.<String>into(FixedWindows.of(ONE_SECOND)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) .discardingFiredPanes() .withAllowedLateness(ONE_SECOND) ) .apply("sink", FileIO.<String>write() .via(TextIO.sink()) .to(".") .withPrefix("beam-output") .withNumShards(1) ) ; return pipeline; }
Example #6
Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void boundedSourceEvaluatorClosesReader() throws Exception { TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of(), 1L, 2L, 3L); PCollection<Long> pcollection = p.apply(Read.from(source)); AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection); UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection); when(context.createBundle(pcollection)).thenReturn(output); TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication( sourceTransform, bundleFactory.createRootBundle().commit(Instant.now())); evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source))); evaluator.finishBundle(); CommittedBundle<Long> committed = output.commit(Instant.now()); assertThat(committed.getElements(), containsInAnyOrder(gw(2L), gw(3L), gw(1L))); assertThat(TestSource.readerClosed, is(true)); }
Example #7
Source File: TCompBoundedSourceSinkAdapterTest.java From components with Apache License 2.0 | 6 votes |
@Test public void testSource() { Pipeline pipeline = TestPipeline.create(); FixedFlowProperties fixedFlowProperties = new FixedFlowProperties("fixedFlowProperties"); fixedFlowProperties.init(); fixedFlowProperties.data.setValue("a;b;c"); fixedFlowProperties.rowDelimited.setValue(";"); FixedFlowSource fixedFlowSource = new FixedFlowSource(); fixedFlowSource.initialize(null, fixedFlowProperties); TCompBoundedSourceAdapter source = new TCompBoundedSourceAdapter(fixedFlowSource); PCollection<String> result = pipeline.apply(Read.from(source)).apply(ParDo.of(new DoFn<IndexedRecord, String>() { @DoFn.ProcessElement public void processElement(ProcessContext c) throws Exception { c.output(c.element().get(0).toString()); } })); PAssert.that(result).containsInAnyOrder(Arrays.asList("a", "b", "c")); pipeline.run(); }
Example #8
Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception { TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of()); PCollection<Long> pcollection = p.apply(Read.from(source)); AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection); UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection); when(context.createBundle(pcollection)).thenReturn(output); TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication( sourceTransform, bundleFactory.createRootBundle().commit(Instant.now())); evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source))); evaluator.finishBundle(); CommittedBundle<Long> committed = output.commit(Instant.now()); assertThat(committed.getElements(), emptyIterable()); assertThat(TestSource.readerClosed, is(true)); }
Example #9
Source File: SyntheticDataPublisher.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); SyntheticSourceOptions sourceOptions = SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class); Pipeline pipeline = Pipeline.create(options); PCollection<KV<byte[], byte[]>> syntheticData = pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions))); if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) { writeToKafka(syntheticData); } if (options.getPubSubTopic() != null) { writeToPubSub(syntheticData); } if (allKinesisOptionsConfigured()) { writeToKinesis(syntheticData); } pipeline.run().waitUntilFinish(); }
Example #10
Source File: WorkerCustomSourcesTest.java From beam with Apache License 2.0 | 6 votes |
static com.google.api.services.dataflow.model.Source translateIOToCloudSource( BoundedSource<?> io, DataflowPipelineOptions options) throws Exception { DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline p = Pipeline.create(options); p.begin().apply(Read.from(io)); DataflowRunner runner = DataflowRunner.fromOptions(options); SdkComponents sdkComponents = SdkComponents.create(); RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment("dummy-image-url"); sdkComponents.registerEnvironment(defaultEnvironmentForDataflow); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true); Job workflow = translator .translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>()) .getJob(); Step step = workflow.getSteps().get(0); return stepToCloudSource(step); }
Example #11
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
/** * Creates a consumer with two topics, with 10 partitions each. numElements are (round-robin) * assigned all the 20 partitions. */ private static KafkaIO.Read<Integer, Long> mkKafkaReadTransform( int numElements, int maxNumRecords, @Nullable SerializableFunction<KV<Integer, Long>, Instant> timestampFn) { List<String> topics = ImmutableList.of("topic_a", "topic_b"); KafkaIO.Read<Integer, Long> reader = KafkaIO.<Integer, Long>read() .withBootstrapServers("myServer1:9092,myServer2:9092") .withTopics(topics) .withConsumerFactoryFn( new ConsumerFactoryFn( topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 20 partitions .withKeyDeserializer(IntegerDeserializer.class) .withValueDeserializer(LongDeserializer.class) .withMaxNumRecords(maxNumRecords); if (timestampFn != null) { return reader.withTimestampFn(timestampFn); } else { return reader; } }
Example #12
Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void readSourceTranslatorUnboundedWithMaxParallelism() { final int maxParallelism = 6; final int parallelism = 2; Read.Unbounded transform = Read.from(new TestUnboundedSource()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setMaxParallelism(maxParallelism); OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env); UnboundedSourceWrapper source = (UnboundedSourceWrapper) ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction(); assertEquals(maxParallelism, source.getSplitSources().size()); }
Example #13
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceWithExplicitPartitions() { int numElements = 1000; List<String> topics = ImmutableList.of("test"); KafkaIO.Read<byte[], Long> reader = KafkaIO.<byte[], Long>read() .withBootstrapServers("none") .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 5))) .withConsumerFactoryFn( new ConsumerFactoryFn( topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 10 partitions .withKeyDeserializer(ByteArrayDeserializer.class) .withValueDeserializer(LongDeserializer.class) .withMaxNumRecords(numElements / 10); PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create()); // assert that every element is a multiple of 5. PAssert.that(input).satisfies(new AssertMultipleOf(5)); PAssert.thatSingleton(input.apply(Count.globally())).isEqualTo(numElements / 10L); p.run(); }
Example #14
Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void readSourceTranslatorBoundedWithoutMaxParallelism() { final int parallelism = 2; Read.Bounded transform = Read.from(new TestBoundedSource(parallelism)); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); SourceTransformation<?> sourceTransform = (SourceTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env); UnboundedSourceWrapperNoValueWithRecordId source = (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction(); assertEquals(parallelism, source.getUnderlyingSource().getSplitSources().size()); }
Example #15
Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void readSourceTranslatorBoundedWithMaxParallelism() { final int maxParallelism = 6; final int parallelism = 2; Read.Bounded transform = Read.from(new TestBoundedSource(maxParallelism)); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setMaxParallelism(maxParallelism); SourceTransformation<?> sourceTransform = (SourceTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env); UnboundedSourceWrapperNoValueWithRecordId source = (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction(); assertEquals(maxParallelism, source.getUnderlyingSource().getSplitSources().size()); }
Example #16
Source File: StreamingTransformTranslator.java From beam with Apache License 2.0 | 6 votes |
private static <T> TransformEvaluator<Read.Unbounded<T>> readUnbounded() { return new TransformEvaluator<Read.Unbounded<T>>() { @Override public void evaluate(Read.Unbounded<T> transform, EvaluationContext context) { final String stepName = context.getCurrentTransform().getFullName(); context.putDataset( transform, SparkUnboundedSource.read( context.getStreamingContext(), context.getSerializableOptions(), transform.getSource(), stepName)); } @Override public String toNativeString() { return "streamingContext.<readFrom(<source>)>()"; } }; }
Example #17
Source File: TransformTranslator.java From beam with Apache License 2.0 | 6 votes |
private static <T> TransformEvaluator<Read.Bounded<T>> readBounded() { return new TransformEvaluator<Read.Bounded<T>>() { @Override public void evaluate(Read.Bounded<T> transform, EvaluationContext context) { String stepName = context.getCurrentTransform().getFullName(); final JavaSparkContext jsc = context.getSparkContext(); // create an RDD from a BoundedSource. JavaRDD<WindowedValue<T>> input = new SourceRDD.Bounded<>( jsc.sc(), transform.getSource(), context.getSerializableOptions(), stepName) .toJavaRDD(); context.putDataset(transform, new BoundedDataset<>(input)); } @Override public String toNativeString() { return "sparkContext.<readFrom(<source>)>()"; } }; }
Example #18
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSourceWithExplicitPartitionsDisplayData() { KafkaIO.Read<byte[], byte[]> read = KafkaIO.readBytes() .withBootstrapServers("myServer1:9092,myServer2:9092") .withTopicPartitions( ImmutableList.of(new TopicPartition("test", 5), new TopicPartition("test", 6))) .withConsumerFactoryFn( new ConsumerFactoryFn( Lists.newArrayList("test"), 10, 10, OffsetResetStrategy.EARLIEST)); // 10 partitions DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("topicPartitions", "test-5,test-6")); assertThat(displayData, hasDisplayItem("enable.auto.commit", false)); assertThat(displayData, hasDisplayItem("bootstrap.servers", "myServer1:9092,myServer2:9092")); assertThat(displayData, hasDisplayItem("auto.offset.reset", "latest")); assertThat(displayData, hasDisplayItem("receive.buffer.bytes", 524288)); }
Example #19
Source File: QueryablePipelineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void getEnvironmentWithEnvironment() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create()); Components components = PipelineTranslation.toProto(p).getComponents(); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); PTransformNode environmentalRead = PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")); PTransformNode nonEnvironmentalTransform = PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey")); assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true)); assertThat( qp.getEnvironment(environmentalRead).get().getUrn(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn())); assertThat( qp.getEnvironment(environmentalRead).get().getPayload(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getPayload())); assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false)); }
Example #20
Source File: QueryablePipelineTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a * transform that consumes the node more than once. */ @Test public void perElementConsumersWithConsumingMultipleTimes() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections()); Components components = PipelineTranslation.toProto(p).getComponents(); // This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation String readOutput = getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values()); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); Set<PTransformNode> consumers = qp.getPerElementConsumers( PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput))); assertThat(consumers.size(), equalTo(1)); assertThat( getOnlyElement(consumers).getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN)); }
Example #21
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceWithSingleTopic() { // same as testUnboundedSource, but with single topic int numElements = 1000; String topic = "my_topic"; KafkaIO.Read<Integer, Long> reader = KafkaIO.<Integer, Long>read() .withBootstrapServers("none") .withTopic("my_topic") .withConsumerFactoryFn( new ConsumerFactoryFn( ImmutableList.of(topic), 10, numElements, OffsetResetStrategy.EARLIEST)) .withMaxNumRecords(numElements) .withKeyDeserializer(IntegerDeserializer.class) .withValueDeserializer(LongDeserializer.class); PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create()); addCountingAsserts(input, numElements); p.run(); }
Example #22
Source File: QueryablePipelineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void rootTransforms() { Pipeline p = Pipeline.create(); p.apply("UnboundedRead", Read.from(CountingSource.unbounded())) .apply(Window.into(FixedWindows.of(Duration.millis(5L)))) .apply(Count.perElement()); p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); Components components = PipelineTranslation.toProto(p).getComponents(); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); assertThat(qp.getRootTransforms(), hasSize(2)); for (PTransformNode rootTransform : qp.getRootTransforms()) { assertThat( "Root transforms should have no inputs", rootTransform.getTransform().getInputsCount(), equalTo(0)); assertThat( "Only added source reads to the pipeline", rootTransform.getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.READ_TRANSFORM_URN)); } }
Example #23
Source File: UnconsumedReadsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void doesNotConsumeAlreadyConsumedRead() { Unbounded<Long> transform = Read.from(CountingSource.unbounded()); final PCollection<Long> output = pipeline.apply(transform); final Flatten.PCollections<Long> consumer = Flatten.pCollections(); PCollectionList.of(output).apply(consumer); UnconsumedReads.ensureAllReadsConsumed(pipeline); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { // The output should only be consumed by a single consumer if (node.getInputs().values().contains(output)) { assertThat(node.getTransform(), Matchers.is(consumer)); } } }); }
Example #24
Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void readSourceTranslatorUnboundedWithoutMaxParallelism() { final int parallelism = 2; Read.Unbounded transform = Read.from(new TestUnboundedSource()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env); UnboundedSourceWrapper source = (UnboundedSourceWrapper) ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction(); assertEquals(parallelism, source.getSplitSources().size()); }
Example #25
Source File: UnconsumedReadsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void matcherProducesUnconsumedValueUnboundedRead() { Unbounded<Long> transform = Read.from(CountingSource.unbounded()); pipeline.apply(transform); UnconsumedReads.ensureAllReadsConsumed(pipeline); validateConsumed(); }
Example #26
Source File: UnconsumedReadsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void matcherProducesUnconsumedValueBoundedRead() { Bounded<Long> transform = Read.from(CountingSource.upTo(20L)); pipeline.apply(transform); UnconsumedReads.ensureAllReadsConsumed(pipeline); validateConsumed(); }
Example #27
Source File: FixedFlowInputRuntime.java From components with Apache License 2.0 | 5 votes |
@Override public PCollection<IndexedRecord> expand(PBegin begin) { return begin.apply(Read.from(new FixedFlowInputBoundedSource() // .withSchema(properties.schemaFlow.schema.getValue())// .withValues(properties.values.getValue()) // .withNbRows(properties.nbRows.getValue()))); }
Example #28
Source File: DirectRunnerTest.java From beam with Apache License 2.0 | 5 votes |
PTransform<PBegin, PCollection<T>> read() { return new PTransform<PBegin, PCollection<T>>() { @Override public PCollection<T> expand(PBegin input) { return input.apply("readFrom:" + name, Read.from(asSource())); } }; }
Example #29
Source File: QueryablePipelineTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void getProducer() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections()); Components components = PipelineTranslation.toProto(p).getComponents(); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); String longsOutputName = getOnlyElement( PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")) .getTransform() .getOutputsMap() .values()); PTransformNode longsProducer = PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")); PCollectionNode longsOutput = PipelineNode.pCollection( longsOutputName, components.getPcollectionsOrThrow(longsOutputName)); String flattenOutputName = getOnlyElement( PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")) .getTransform() .getOutputsMap() .values()); PTransformNode flattenProducer = PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")); PCollectionNode flattenOutput = PipelineNode.pCollection( flattenOutputName, components.getPcollectionsOrThrow(flattenOutputName)); assertThat(qp.getProducer(longsOutput), equalTo(longsProducer)); assertThat(qp.getProducer(flattenOutput), equalTo(flattenProducer)); }
Example #30
Source File: DirectRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void splitsInputs() { Pipeline p = getPipeline(); PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3)))); PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L); p.run(); }