org.apache.beam.sdk.io.GenerateSequence Java Examples

The following examples show how to use org.apache.beam.sdk.io.GenerateSequence. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KinesisIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Write test dataset into Kinesis stream. */
private void runWrite() {
  pipelineWrite
      .apply("Generate Sequence", GenerateSequence.from(0).to((long) numberOfRows))
      .apply("Prepare TestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn()))
      .apply("Prepare Kinesis input records", ParDo.of(new ConvertToBytes()))
      .apply(
          "Write to Kinesis",
          KinesisIO.write()
              .withStreamName(options.getAwsKinesisStream())
              .withPartitioner(new RandomPartitioner())
              .withAWSClientsProvider(
                  options.getAwsAccessKey(),
                  options.getAwsSecretKey(),
                  Regions.fromName(options.getAwsKinesisRegion())));

  pipelineWrite.run().waitUntilFinish();
}
 
Example #2
Source File: DirectGraphVisitorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getRootTransformsContainsRootTransforms() {
  PCollection<String> created = p.apply(Create.of("foo", "bar"));
  PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L)));
  PCollection<Long> unCounted = p.apply(GenerateSequence.from(0));
  p.traverseTopologically(visitor);
  DirectGraph graph = visitor.getGraph();
  assertThat(graph.getRootTransforms(), hasSize(3));
  assertThat(
      graph.getRootTransforms(),
      Matchers.containsInAnyOrder(
          new Object[] {
            graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted)
          }));
  for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) {
    // Root transforms will have no inputs
    assertThat(root.getInputs().entrySet(), emptyIterable());
    assertThat(
        Iterables.getOnlyElement(root.getOutputs().values()),
        Matchers.<POutput>isOneOf(created, counted, unCounted));
  }
}
 
Example #3
Source File: ReadSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void runProgram(String resultPath) throws Exception {

    Pipeline p = FlinkTestPipeline.createForBatch();

    PCollection<String> result =
        p.apply(GenerateSequence.from(0).to(10))
            .apply(
                ParDo.of(
                    new DoFn<Long, String>() {
                      @ProcessElement
                      public void processElement(ProcessContext c) throws Exception {
                        c.output(c.element().toString());
                      }
                    }));

    result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));

    p.run();
  }
 
Example #4
Source File: FlinkPipelineExecutionEnvironmentTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTranslationModeNoOverrideWithoutUnboundedSources() {
  boolean[] testArgs = new boolean[] {true, false};
  for (boolean streaming : testArgs) {
    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
    options.setRunner(FlinkRunner.class);
    options.setStreaming(streaming);

    FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(GenerateSequence.from(0).to(10));
    flinkEnv.translate(pipeline);

    assertThat(options.isStreaming(), Matchers.is(streaming));
  }
}
 
Example #5
Source File: MetricsPusherTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Category({
  ValidatesRunner.class,
  UsesAttemptedMetrics.class,
  UsesCounterMetrics.class,
  UsesSystemMetrics.class
})
@Test
public void pushesSystemMetrics() throws InterruptedException {
  TestMetricsSink.clear();
  pipeline
      .apply(
          // Use maxReadTime to force unbounded mode.
          GenerateSequence.from(0).to(NUM_ELEMENTS).withMaxReadTime(Duration.standardDays(1)))
      .apply(ParDo.of(new CountingDoFn()));
  pipeline.run();
  // give metrics pusher time to push
  Thread.sleep(
      (pipeline.getOptions().as(MetricsOptions.class).getMetricsPushPeriod() + 1L) * 1000);
  assertThat(TestMetricsSink.getSystemCounters().isEmpty(), is(false));
}
 
Example #6
Source File: FlattenTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, FlattenWithHeterogeneousCoders.class})
public void testFlattenMultipleCoders() throws CannotProvideCoderException {
  PCollection<Long> bigEndianLongs =
      p.apply(
          "BigEndianLongs",
          Create.of(0L, 1L, 2L, 3L, null, 4L, 5L, null, 6L, 7L, 8L, null, 9L)
              .withCoder(NullableCoder.of(BigEndianLongCoder.of())));
  PCollection<Long> varLongs =
      p.apply("VarLengthLongs", GenerateSequence.from(0).to(5)).setCoder(VarLongCoder.of());

  PCollection<Long> flattened =
      PCollectionList.of(bigEndianLongs)
          .and(varLongs)
          .apply(Flatten.pCollections())
          .setCoder(NullableCoder.of(VarLongCoder.of()));
  PAssert.that(flattened)
      .containsInAnyOrder(
          0L, 0L, 1L, 1L, 2L, 3L, 2L, 4L, 5L, 3L, 6L, 7L, 4L, 8L, 9L, null, null, null);
  p.run();
}
 
Example #7
Source File: ReadData.java    From java-docs-samples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  ReadDataOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(ReadDataOptions.class);
  Pipeline p = Pipeline.create(options);
  CloudBigtableTableConfiguration bigtableTableConfig =
      new CloudBigtableTableConfiguration.Builder()
          .withProjectId(options.getBigtableProjectId())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId())
          .build();

  // Initiates a new pipeline every second
  p.apply(GenerateSequence.from(0).withRate(1, new Duration(1000)))
      .apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig, options)));
  p.run();
}
 
Example #8
Source File: QueryablePipelineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void retainOnlyPrimitivesComposites() {
  Pipeline p = Pipeline.create();
  p.apply(
      new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() {
        @Override
        public PCollection<Long> expand(PBegin input) {
          return input
              .apply(GenerateSequence.from(2L))
              .apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L))))
              .apply(MapElements.into(TypeDescriptors.longs()).via(l -> l + 1));
        }
      });

  Components originalComponents = PipelineTranslation.toProto(p).getComponents();
  Collection<String> primitiveComponents =
      QueryablePipeline.getPrimitiveTransformIds(originalComponents);

  // Read, Window.Assign, ParDo. This will need to be updated if the expansions change.
  assertThat(primitiveComponents, hasSize(3));
  for (String transformId : primitiveComponents) {
    assertThat(originalComponents.getTransformsMap(), hasKey(transformId));
  }
}
 
Example #9
Source File: PCollectionTupleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testEquals() {
  TestPipeline p = TestPipeline.create();
  TupleTag<Long> longTag = new TupleTag<>();
  PCollection<Long> longs = p.apply(GenerateSequence.from(0));
  TupleTag<String> strTag = new TupleTag<>();
  PCollection<String> strs = p.apply(Create.of("foo", "bar"));

  EqualsTester tester = new EqualsTester();
  // Empty tuples in the same pipeline are equal
  tester.addEqualityGroup(PCollectionTuple.empty(p), PCollectionTuple.empty(p));

  tester.addEqualityGroup(
      PCollectionTuple.of(longTag, longs).and(strTag, strs),
      PCollectionTuple.of(longTag, longs).and(strTag, strs));

  tester.addEqualityGroup(PCollectionTuple.of(longTag, longs));
  tester.addEqualityGroup(PCollectionTuple.of(strTag, strs));

  TestPipeline otherPipeline = TestPipeline.create();
  // Empty tuples in different pipelines are not equal
  tester.addEqualityGroup(PCollectionTuple.empty(otherPipeline));
  tester.testEquals();
}
 
Example #10
Source File: EmptyFlattenAsCreateFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getInputNonEmptyThrows() {
  PCollectionList<Long> nonEmpty =
      PCollectionList.of(pipeline.apply("unbounded", GenerateSequence.from(0)))
          .and(pipeline.apply("bounded", GenerateSequence.from(0).to(100)));
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(nonEmpty.expand().toString());
  thrown.expectMessage(EmptyFlattenAsCreateFactory.class.getSimpleName());
  factory.getReplacementTransform(
      AppliedPTransform.of(
          "nonEmptyInput",
          nonEmpty.expand(),
          Collections.emptyMap(),
          Flatten.pCollections(),
          pipeline));
}
 
Example #11
Source File: WindowIntoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testToFromProto() throws InvalidProtocolBufferException {
  pipeline.apply(GenerateSequence.from(0)).apply(Window.<Long>into((WindowFn) windowFn));

  final AtomicReference<AppliedPTransform<?, ?, Assign<?>>> assign = new AtomicReference<>(null);
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          if (node.getTransform() instanceof Window.Assign) {
            checkState(assign.get() == null);
            assign.set(
                (AppliedPTransform<?, ?, Assign<?>>) node.toAppliedPTransform(getPipeline()));
          }
        }
      });
  checkState(assign.get() != null);

  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  WindowIntoPayload payload =
      WindowIntoTranslation.toProto(assign.get().getTransform(), components);

  assertEquals(windowFn, WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()));
}
 
Example #12
Source File: TransformInputsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void nonAdditionalInputsWithOnlyAdditionalInputsThrows() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional-only",
          additionalInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("at least one");
  TransformInputs.nonAdditionalInputs(transform);
}
 
Example #13
Source File: TransformInputsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void nonAdditionalInputsWithAdditionalInputsSucceeds() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  Map<TupleTag<?>, PValue> allInputs = new HashMap<>();
  PCollection<Integer> mainInts = pipeline.apply("MainInput", Create.of(12, 3));
  allInputs.put(new TupleTag<Integer>() {}, mainInts);
  PCollection<Void> voids = pipeline.apply("VoidInput", Create.empty(VoidCoder.of()));
  allInputs.put(new TupleTag<Void>() {}, voids);
  allInputs.putAll(additionalInputs);

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional",
          allInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  assertThat(
      TransformInputs.nonAdditionalInputs(transform),
      Matchers.containsInAnyOrder(mainInts, voids));
}
 
Example #14
Source File: PTransformTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
  PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.asSingleton());
  PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
  ParDo.MultiOutput<Long, KV<Long, String>> parDo =
      ParDo.of(new TestDoFn())
          .withSideInputs(view)
          .withOutputTags(
              new TupleTag<KV<Long, String>>() {},
              TupleTagList.of(new TupleTag<KV<String, Long>>() {}));
  PCollectionTuple output = input.apply(parDo);

  Map<TupleTag<?>, PValue> inputs = new HashMap<>();
  inputs.putAll(parDo.getAdditionalInputs());
  inputs.putAll(input.expand());

  return AppliedPTransform
      .<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of(
          "MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
 
Example #15
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateNeverWithStreaming() throws Exception {
  p.enableAbandonedNodeEnforcement(false);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId("dataset");
  tableRef.setTableId("sometable");

  PCollection<TableRow> tableRows =
      p.apply(GenerateSequence.from(0))
          .apply(
              MapElements.via(
                  new SimpleFunction<Long, TableRow>() {
                    @Override
                    public TableRow apply(Long input) {
                      return null;
                    }
                  }))
          .setCoder(TableRowJsonCoder.of());
  tableRows.apply(
      BigQueryIO.writeTableRows()
          .to(tableRef)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
          .withoutValidation());
}
 
Example #16
Source File: V1WriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * An end-to-end test for {@link DatastoreV1.Write}.
 *
 * <p>Write some test entities to Cloud Datastore. Read and count all the entities. Verify that
 * the count matches the number of entities written.
 */
@Test
public void testE2EV1Write() throws Exception {
  Pipeline p = Pipeline.create(options);

  // Write to datastore
  p.apply(GenerateSequence.from(0).to(numEntities))
      .apply(ParDo.of(new CreateEntityFn(options.getKind(), options.getNamespace(), ancestor, 0)))
      .apply(DatastoreIO.v1().write().withProjectId(project));

  p.run();

  // Count number of entities written to datastore.
  long numEntitiesWritten = countEntities(options, project, ancestor);

  assertEquals(numEntities, numEntitiesWritten);
}
 
Example #17
Source File: SpannerWriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWrite() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
 
Example #18
Source File: KuduIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runWrite() throws Exception {
  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply(
          "Write records to Kudu",
          KuduIO.write()
              .withMasterAddresses(options.getKuduMasterAddresses())
              .withTable(options.getKuduTable())
              .withFormatFn(new GenerateUpsert()));
  writePipeline.run().waitUntilFinish();

  Assert.assertThat(
      "Wrong number of records in table",
      rowCount(kuduTable),
      equalTo(options.getNumberOfRecords()));
}
 
Example #19
Source File: SpannerWriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReportFailures() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName)
              .withFailureMode(SpannerIO.FailureMode.REPORT_FAILURES));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
 
Example #20
Source File: SpannerWriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailFast() throws Exception {
  thrown.expect(new StackTraceContainsString("SpannerException"));
  thrown.expect(new StackTraceContainsString("Value must not be NULL in table users"));
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName));

  PipelineResult result = p.run();
  result.waitUntilFinish();
}
 
Example #21
Source File: SdkComponentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void registerPCollection() throws IOException {
  PCollection<Long> pCollection = pipeline.apply(GenerateSequence.from(0)).setName("foo");
  String id = components.registerPCollection(pCollection);
  assertThat(id, equalTo("foo"));
  components.toComponents().getPcollectionsOrThrow(id);
}
 
Example #22
Source File: MongoDBIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteAndRead() {
  initialCollectionSize = getCollectionSizeInBytes(collection);

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply("Produce documents", MapElements.via(new LongToDocumentFn()))
      .apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time")))
      .apply(
          "Write documents to MongoDB",
          MongoDbIO.write()
              .withUri(mongoUrl)
              .withDatabase(options.getMongoDBDatabaseName())
              .withCollection(collection));
  PipelineResult writeResult = writePipeline.run();
  writeResult.waitUntilFinish();

  finalCollectionSize = getCollectionSizeInBytes(collection);

  PCollection<String> consolidatedHashcode =
      readPipeline
          .apply(
              "Read all documents",
              MongoDbIO.read()
                  .withUri(mongoUrl)
                  .withDatabase(options.getMongoDBDatabaseName())
                  .withCollection(collection))
          .apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time")))
          .apply("Map documents to Strings", MapElements.via(new DocumentToStringFn()))
          .apply("Calculate hashcode", Combine.globally(new HashingFn()));

  String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
  PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);

  PipelineResult readResult = readPipeline.run();
  readResult.waitUntilFinish();
  collectAndPublishMetrics(writeResult, readResult);
}
 
Example #23
Source File: SimpleFileIOOutputRuntimeUnboundedTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Basic unit test writing to Avro.
 */
@Test
public void testBasicAvroUnbounded() throws IOException, URISyntaxException {
    String fileSpec = mini
            .getLocalFs()
            .getUri()
            .resolve(new Path(mini.newFolder().toString(), "output.avro").toUri())
            .toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> input = p //
            .apply(GenerateSequence.from(0).withRate(10, Duration.millis(1000))) //
            .apply(ParDo.of(new GenerateDoFn()));
    input.apply(runtime);

    // And run the test.
    PipelineResult pr = p.run();

    // Check the expected values.
    // TODO(rskraba): Implement a comparison for the file on disk.
    // mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}
 
Example #24
Source File: ReadSourcePortableTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120_000)
public void testExecution() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
  options.setRunner(CrashingRunner.class);
  options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
  options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
  options.as(FlinkPipelineOptions.class).setParallelism(2);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline p = Pipeline.create(options);
  PCollection<Long> result = p.apply(GenerateSequence.from(0L).to(10L));
  PAssert.that(result)
      .containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));

  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);

  // execute the pipeline
  JobInvocation jobInvocation =
      FlinkJobInvoker.create(null)
          .createJobInvocation(
              "fakeId",
              "fakeRetrievalToken",
              flinkJobExecutor,
              pipelineProto,
              options.as(FlinkPipelineOptions.class),
              new FlinkPipelineRunner(
                  options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
  jobInvocation.start();
  while (jobInvocation.getState() != Enum.DONE) {
    Thread.sleep(100);
  }
}
 
Example #25
Source File: CassandraIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void runWrite() {
  pipelineWrite
      .apply("GenSequence", GenerateSequence.from(0).to((long) options.getNumberOfRecords()))
      .apply("PrepareTestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn()))
      .apply("MapToEntity", ParDo.of(new CreateScientistFn()))
      .apply(
          "WriteToCassandra",
          CassandraIO.<Scientist>write()
              .withHosts(options.getCassandraHost())
              .withPort(options.getCassandraPort())
              .withKeyspace(KEYSPACE)
              .withEntity(Scientist.class));

  pipelineWrite.run().waitUntilFinish();
}
 
Example #26
Source File: KuduIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test the write path using a {@link FakeWriter} and verifies the expected log statements are
 * written. This test ensures that the {@link KuduIO} correctly respects parallelism by
 * deserializing writers and that each writer is opening and closing Kudu sessions.
 */
@Test
@Ignore
public void testWrite() throws Exception {
  when(mockWriteService.createWriter(any())).thenReturn(new FakeWriter());

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(numberRecords))
      .apply(
          "Write records to Kudu",
          KuduIO.write()
              .withMasterAddresses("ignored")
              .withTable("ignored")
              .withFormatFn(new GenerateUpsert()) // ignored (mocking Operation is pointless)
              .withKuduService(mockWriteService));
  writePipeline.run().waitUntilFinish();

  for (int i = 1; i <= targetParallelism; i++) {
    expectedWriteLogs.verifyDebug(String.format(FakeWriter.LOG_OPEN_SESSION, i));
    expectedWriteLogs.verifyDebug(
        String.format(FakeWriter.LOG_WRITE, i)); // at least one per writer
    expectedWriteLogs.verifyDebug(String.format(FakeWriter.LOG_CLOSE_SESSION, i));
  }
  // verify all entries written
  for (int n = 0; n < numberRecords; n++) {
    expectedWriteLogs.verifyDebug(
        String.format(FakeWriter.LOG_WRITE_VALUE, n)); // at least one per writer
  }
}
 
Example #27
Source File: FlinkPipelineExecutionEnvironmentTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldLogWarningWhenCheckpointingIsDisabled() {
  Pipeline pipeline = Pipeline.create();
  pipeline.getOptions().setRunner(TestFlinkRunner.class);

  pipeline
      // Add an UnboundedSource to check for the warning if checkpointing is disabled
      .apply(GenerateSequence.from(0))
      .apply(
          ParDo.of(
              new DoFn<Long, Void>() {
                @ProcessElement
                public void processElement(ProcessContext ctx) {
                  throw new RuntimeException("Failing here is ok.");
                }
              }));

  final PrintStream oldErr = System.err;
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  PrintStream replacementStdErr = new PrintStream(byteArrayOutputStream);
  try {
    System.setErr(replacementStdErr);
    // Run pipeline and fail during execution
    pipeline.run();
    fail("Should have failed");
  } catch (Exception e) {
    // We want to fail here
  } finally {
    System.setErr(oldErr);
  }
  replacementStdErr.flush();
  assertThat(
      new String(byteArrayOutputStream.toByteArray(), Charsets.UTF_8),
      containsString(
          "UnboundedSources present which rely on checkpointing, but checkpointing is disabled."));
}
 
Example #28
Source File: PipelineTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
  Pipeline trivialPipeline = Pipeline.create();
  trivialPipeline.apply(Create.of(1, 2, 3));

  Pipeline sideInputPipeline = Pipeline.create();
  final PCollectionView<String> singletonView =
      sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton());
  sideInputPipeline
      .apply(Create.of("main input"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                      // actually never executed and no effect on translation
                      c.sideInput(singletonView);
                    }
                  })
              .withSideInputs(singletonView));

  Pipeline complexPipeline = Pipeline.create();
  BigEndianLongCoder customCoder = BigEndianLongCoder.of();
  PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
  PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
  PCollection<Long> windowed =
      counted.apply(
          Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(19)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(3L)));
  final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
  PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo"));
  PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create());

  return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}
 
Example #29
Source File: FlinkPipelineExecutionEnvironmentTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTranslationModeOverrideWithUnboundedSources() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);
  options.setStreaming(false);

  FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(GenerateSequence.from(0));
  flinkEnv.translate(pipeline);

  assertThat(options.isStreaming(), Matchers.is(true));
}
 
Example #30
Source File: FlinkSubmissionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** The Flink program which is executed by the CliFrontend. */
public static void main(String[] args) {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);
  options.setStreaming(streaming);
  options.setParallelism(1);
  Pipeline p = Pipeline.create(options);
  p.apply(GenerateSequence.from(0).to(1));
  p.run();
}