org.apache.beam.sdk.io.Read Java Exaples

Source File: UnboundedReadFromBoundedSourceTest.java From beam with Apache License 2.0

6 votes

@Test
@Category(NeedsRunner.class)
public void testBoundedToUnboundedSourceAdapter() throws Exception {
  long numElements = 100;
  BoundedSource<Long> boundedSource = CountingSource.upTo(numElements);
  UnboundedSource<Long, Checkpoint<Long>> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements));

  // Count == numElements
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements);
  // Unique count == numElements
  PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally()))
      .isEqualTo(numElements);
  // Min == 0
  PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L);
  // Max == numElements-1
  PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1);
  p.run();
}

Source File: BeamSumDemo.java From scotty-window-processor with Apache License 2.0

6 votes

public static void main(String[] args) {

        PipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create();
        Pipeline p = Pipeline.create(options);
        System.out.println("Running Pipeline\n " + p.getOptions());

        PCollection<KV<Integer, Integer>> data = p.begin().apply(Read.from(new DataGeneratorSource(0, new TimeStampGenerator())));

        KeyedScottyWindowOperator<Integer, Integer> scottyWindowDoFn = new KeyedScottyWindowOperator<Integer, Integer>(0, new Sum());
        scottyWindowDoFn.addWindow(new TumblingWindow(WindowMeasure.Time, 5000));
        //scottyWindowDoFn.addWindow(new SlidingWindow(WindowMeasure.Time, 2000, 1000));
        //scottyWindowDoFn.addWindow(new SessionWindow(WindowMeasure.Time, 2000));

        //Apply Scotty Windowing
        PCollection<String> result = data.apply(ParDo.of(scottyWindowDoFn));

        //Print window results
        result.apply(ParDo.of(new printObject()));
        p.run().waitUntilFinish();

    }

Source File: BigQueryIOIT.java From beam with Apache License 2.0

6 votes

private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
  Pipeline pipeline = Pipeline.create(options);

  BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
  pipeline
      .apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions)))
      .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName)))
      .apply("Map records", ParDo.of(new MapKVToV()))
      .apply(
          "Write to BQ",
          writeIO
              .to(tableQualifier)
              .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot))
              .withMethod(method)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          Collections.singletonList(
                              new TableFieldSchema().setName("data").setType("BYTES")))));

  PipelineResult pipelineResult = pipeline.run();
  pipelineResult.waitUntilFinish();
  extractAndPublishTime(pipelineResult, metricName);
}

Source File: DirectGraphVisitorTest.java From beam with Apache License 2.0

6 votes

@Test
public void getRootTransformsContainsRootTransforms() {
  PCollection<String> created = p.apply(Create.of("foo", "bar"));
  PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L)));
  PCollection<Long> unCounted = p.apply(GenerateSequence.from(0));
  p.traverseTopologically(visitor);
  DirectGraph graph = visitor.getGraph();
  assertThat(graph.getRootTransforms(), hasSize(3));
  assertThat(
      graph.getRootTransforms(),
      Matchers.containsInAnyOrder(
          new Object[] {
            graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted)
          }));
  for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) {
    // Root transforms will have no inputs
    assertThat(root.getInputs().entrySet(), emptyIterable());
    assertThat(
        Iterables.getOnlyElement(root.getOutputs().values()),
        Matchers.<POutput>isOneOf(created, counted, unCounted));
  }
}

Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0

6 votes

public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}

Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void boundedSourceEvaluatorClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of(), 1L, 2L, 3L);
  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), containsInAnyOrder(gw(2L), gw(3L), gw(1L)));
  assertThat(TestSource.readerClosed, is(true));
}

Source File: TCompBoundedSourceSinkAdapterTest.java From components with Apache License 2.0

6 votes

@Test
public void testSource() {
    Pipeline pipeline = TestPipeline.create();

    FixedFlowProperties fixedFlowProperties = new FixedFlowProperties("fixedFlowProperties");
    fixedFlowProperties.init();
    fixedFlowProperties.data.setValue("a;b;c");
    fixedFlowProperties.rowDelimited.setValue(";");


    FixedFlowSource fixedFlowSource = new FixedFlowSource();
    fixedFlowSource.initialize(null, fixedFlowProperties);

    TCompBoundedSourceAdapter source = new TCompBoundedSourceAdapter(fixedFlowSource);

    PCollection<String> result = pipeline.apply(Read.from(source)).apply(ParDo.of(new DoFn<IndexedRecord, String>() {
        @DoFn.ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().get(0).toString());
        }
    }));

    PAssert.that(result).containsInAnyOrder(Arrays.asList("a", "b", "c"));

    pipeline.run();
}

Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of());

  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), emptyIterable());
  assertThat(TestSource.readerClosed, is(true));
}

Source File: SyntheticDataPublisher.java From beam with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException {
  options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

  SyntheticSourceOptions sourceOptions =
      SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class);

  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<byte[], byte[]>> syntheticData =
      pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)));

  if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) {
    writeToKafka(syntheticData);
  }
  if (options.getPubSubTopic() != null) {
    writeToPubSub(syntheticData);
  }
  if (allKinesisOptionsConfigured()) {
    writeToKinesis(syntheticData);
  }
  pipeline.run().waitUntilFinish();
}

Source File: WorkerCustomSourcesTest.java From beam with Apache License 2.0

6 votes

static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
    BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
  Pipeline p = Pipeline.create(options);
  p.begin().apply(Read.from(io));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  SdkComponents sdkComponents = SdkComponents.create();
  RunnerApi.Environment defaultEnvironmentForDataflow =
      Environments.createDockerEnvironment("dummy-image-url");
  sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);

  Job workflow =
      translator
          .translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>())
          .getJob();
  Step step = workflow.getSteps().get(0);

  return stepToCloudSource(step);
}

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

/**
 * Creates a consumer with two topics, with 10 partitions each. numElements are (round-robin)
 * assigned all the 20 partitions.
 */
private static KafkaIO.Read<Integer, Long> mkKafkaReadTransform(
    int numElements,
    int maxNumRecords,
    @Nullable SerializableFunction<KV<Integer, Long>, Instant> timestampFn) {

  List<String> topics = ImmutableList.of("topic_a", "topic_b");

  KafkaIO.Read<Integer, Long> reader =
      KafkaIO.<Integer, Long>read()
          .withBootstrapServers("myServer1:9092,myServer2:9092")
          .withTopics(topics)
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 20 partitions
          .withKeyDeserializer(IntegerDeserializer.class)
          .withValueDeserializer(LongDeserializer.class)
          .withMaxNumRecords(maxNumRecords);

  if (timestampFn != null) {
    return reader.withTimestampFn(timestampFn);
  } else {
    return reader;
  }
}

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorUnboundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getSplitSources().size());
}

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testUnboundedSourceWithExplicitPartitions() {
  int numElements = 1000;

  List<String> topics = ImmutableList.of("test");

  KafkaIO.Read<byte[], Long> reader =
      KafkaIO.<byte[], Long>read()
          .withBootstrapServers("none")
          .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 5)))
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 10 partitions
          .withKeyDeserializer(ByteArrayDeserializer.class)
          .withValueDeserializer(LongDeserializer.class)
          .withMaxNumRecords(numElements / 10);

  PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create());

  // assert that every element is a multiple of 5.
  PAssert.that(input).satisfies(new AssertMultipleOf(5));

  PAssert.thatSingleton(input.apply(Count.globally())).isEqualTo(numElements / 10L);

  p.run();
}

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorBoundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(parallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(parallelism, source.getUnderlyingSource().getSplitSources().size());
}

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorBoundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(maxParallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getUnderlyingSource().getSplitSources().size());
}

Source File: StreamingTransformTranslator.java From beam with Apache License 2.0

6 votes

private static <T> TransformEvaluator<Read.Unbounded<T>> readUnbounded() {
  return new TransformEvaluator<Read.Unbounded<T>>() {
    @Override
    public void evaluate(Read.Unbounded<T> transform, EvaluationContext context) {
      final String stepName = context.getCurrentTransform().getFullName();
      context.putDataset(
          transform,
          SparkUnboundedSource.read(
              context.getStreamingContext(),
              context.getSerializableOptions(),
              transform.getSource(),
              stepName));
    }

    @Override
    public String toNativeString() {
      return "streamingContext.<readFrom(<source>)>()";
    }
  };
}

Source File: TransformTranslator.java From beam with Apache License 2.0

6 votes

private static <T> TransformEvaluator<Read.Bounded<T>> readBounded() {
  return new TransformEvaluator<Read.Bounded<T>>() {
    @Override
    public void evaluate(Read.Bounded<T> transform, EvaluationContext context) {
      String stepName = context.getCurrentTransform().getFullName();
      final JavaSparkContext jsc = context.getSparkContext();
      // create an RDD from a BoundedSource.
      JavaRDD<WindowedValue<T>> input =
          new SourceRDD.Bounded<>(
                  jsc.sc(), transform.getSource(), context.getSerializableOptions(), stepName)
              .toJavaRDD();

      context.putDataset(transform, new BoundedDataset<>(input));
    }

    @Override
    public String toNativeString() {
      return "sparkContext.<readFrom(<source>)>()";
    }
  };
}

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testSourceWithExplicitPartitionsDisplayData() {
  KafkaIO.Read<byte[], byte[]> read =
      KafkaIO.readBytes()
          .withBootstrapServers("myServer1:9092,myServer2:9092")
          .withTopicPartitions(
              ImmutableList.of(new TopicPartition("test", 5), new TopicPartition("test", 6)))
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  Lists.newArrayList("test"),
                  10,
                  10,
                  OffsetResetStrategy.EARLIEST)); // 10 partitions

  DisplayData displayData = DisplayData.from(read);

  assertThat(displayData, hasDisplayItem("topicPartitions", "test-5,test-6"));
  assertThat(displayData, hasDisplayItem("enable.auto.commit", false));
  assertThat(displayData, hasDisplayItem("bootstrap.servers", "myServer1:9092,myServer2:9092"));
  assertThat(displayData, hasDisplayItem("auto.offset.reset", "latest"));
  assertThat(displayData, hasDisplayItem("receive.buffer.bytes", 524288));
}

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

@Test
public void getEnvironmentWithEnvironment() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create());

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  PTransformNode environmentalRead =
      PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"));
  PTransformNode nonEnvironmentalTransform =
      PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey"));

  assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true));
  assertThat(
      qp.getEnvironment(environmentalRead).get().getUrn(),
      equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn()));
  assertThat(
      qp.getEnvironment(environmentalRead).get().getPayload(),
      equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getPayload()));
  assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false));
}

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

/**
 * Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a
 * transform that consumes the node more than once.
 */
@Test
public void perElementConsumersWithConsumingMultipleTimes() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());

  Components components = PipelineTranslation.toProto(p).getComponents();
  // This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation
  String readOutput =
      getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values());
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
  Set<PTransformNode> consumers =
      qp.getPerElementConsumers(
          PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput)));

  assertThat(consumers.size(), equalTo(1));
  assertThat(
      getOnlyElement(consumers).getTransform().getSpec().getUrn(),
      equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN));
}

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testUnboundedSourceWithSingleTopic() {
  // same as testUnboundedSource, but with single topic

  int numElements = 1000;
  String topic = "my_topic";

  KafkaIO.Read<Integer, Long> reader =
      KafkaIO.<Integer, Long>read()
          .withBootstrapServers("none")
          .withTopic("my_topic")
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  ImmutableList.of(topic), 10, numElements, OffsetResetStrategy.EARLIEST))
          .withMaxNumRecords(numElements)
          .withKeyDeserializer(IntegerDeserializer.class)
          .withValueDeserializer(LongDeserializer.class);

  PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create());

  addCountingAsserts(input, numElements);
  p.run();
}

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

@Test
public void rootTransforms() {
  Pipeline p = Pipeline.create();
  p.apply("UnboundedRead", Read.from(CountingSource.unbounded()))
      .apply(Window.into(FixedWindows.of(Duration.millis(5L))))
      .apply(Count.perElement());
  p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  assertThat(qp.getRootTransforms(), hasSize(2));
  for (PTransformNode rootTransform : qp.getRootTransforms()) {
    assertThat(
        "Root transforms should have no inputs",
        rootTransform.getTransform().getInputsCount(),
        equalTo(0));
    assertThat(
        "Only added source reads to the pipeline",
        rootTransform.getTransform().getSpec().getUrn(),
        equalTo(PTransformTranslation.READ_TRANSFORM_URN));
  }
}

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

6 votes

@Test
public void doesNotConsumeAlreadyConsumedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  final PCollection<Long> output = pipeline.apply(transform);
  final Flatten.PCollections<Long> consumer = Flatten.pCollections();
  PCollectionList.of(output).apply(consumer);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          // The output should only be consumed by a single consumer
          if (node.getInputs().values().contains(output)) {
            assertThat(node.getTransform(), Matchers.is(consumer));
          }
        }
      });
}

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorUnboundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(parallelism, source.getSplitSources().size());
}

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

5 votes

@Test
public void matcherProducesUnconsumedValueUnboundedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

5 votes

@Test
public void matcherProducesUnconsumedValueBoundedRead() {
  Bounded<Long> transform = Read.from(CountingSource.upTo(20L));
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}

Source File: FixedFlowInputRuntime.java From components with Apache License 2.0

5 votes

@Override
public PCollection<IndexedRecord> expand(PBegin begin) {
    return begin.apply(Read.from(new FixedFlowInputBoundedSource() //
            .withSchema(properties.schemaFlow.schema.getValue())//
            .withValues(properties.values.getValue()) //
            .withNbRows(properties.nbRows.getValue())));
}

Source File: DirectRunnerTest.java From beam with Apache License 2.0

5 votes

PTransform<PBegin, PCollection<T>> read() {
  return new PTransform<PBegin, PCollection<T>>() {
    @Override
    public PCollection<T> expand(PBegin input) {
      return input.apply("readFrom:" + name, Read.from(asSource()));
    }
  };
}

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

5 votes

@Test
public void getProducer() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  String longsOutputName =
      getOnlyElement(
          PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"))
              .getTransform()
              .getOutputsMap()
              .values());
  PTransformNode longsProducer =
      PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"));
  PCollectionNode longsOutput =
      PipelineNode.pCollection(
          longsOutputName, components.getPcollectionsOrThrow(longsOutputName));
  String flattenOutputName =
      getOnlyElement(
          PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"))
              .getTransform()
              .getOutputsMap()
              .values());
  PTransformNode flattenProducer =
      PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"));
  PCollectionNode flattenOutput =
      PipelineNode.pCollection(
          flattenOutputName, components.getPcollectionsOrThrow(flattenOutputName));

  assertThat(qp.getProducer(longsOutput), equalTo(longsProducer));
  assertThat(qp.getProducer(flattenOutput), equalTo(flattenProducer));
}

Source File: DirectRunnerTest.java From beam with Apache License 2.0

5 votes

@Test
public void splitsInputs() {
  Pipeline p = getPipeline();
  PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3))));

  PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L);
  p.run();
}

org.apache.beam.sdk.io.Read Java Examples