org.apache.beam.sdk.io.GenerateSequence Java Exaples

Source File: KinesisIOIT.java From beam with Apache License 2.0

6 votes

/** Write test dataset into Kinesis stream. */
private void runWrite() {
  pipelineWrite
      .apply("Generate Sequence", GenerateSequence.from(0).to((long) numberOfRows))
      .apply("Prepare TestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn()))
      .apply("Prepare Kinesis input records", ParDo.of(new ConvertToBytes()))
      .apply(
          "Write to Kinesis",
          KinesisIO.write()
              .withStreamName(options.getAwsKinesisStream())
              .withPartitioner(new RandomPartitioner())
              .withAWSClientsProvider(
                  options.getAwsAccessKey(),
                  options.getAwsSecretKey(),
                  Regions.fromName(options.getAwsKinesisRegion())));

  pipelineWrite.run().waitUntilFinish();
}

Source File: DirectGraphVisitorTest.java From beam with Apache License 2.0

6 votes

@Test
public void getRootTransformsContainsRootTransforms() {
  PCollection<String> created = p.apply(Create.of("foo", "bar"));
  PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L)));
  PCollection<Long> unCounted = p.apply(GenerateSequence.from(0));
  p.traverseTopologically(visitor);
  DirectGraph graph = visitor.getGraph();
  assertThat(graph.getRootTransforms(), hasSize(3));
  assertThat(
      graph.getRootTransforms(),
      Matchers.containsInAnyOrder(
          new Object[] {
            graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted)
          }));
  for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) {
    // Root transforms will have no inputs
    assertThat(root.getInputs().entrySet(), emptyIterable());
    assertThat(
        Iterables.getOnlyElement(root.getOutputs().values()),
        Matchers.<POutput>isOneOf(created, counted, unCounted));
  }
}

Source File: ReadSourceTest.java From beam with Apache License 2.0

6 votes

private static void runProgram(String resultPath) throws Exception {

    Pipeline p = FlinkTestPipeline.createForBatch();

    PCollection<String> result =
        p.apply(GenerateSequence.from(0).to(10))
            .apply(
                ParDo.of(
                    new DoFn<Long, String>() {
                      @ProcessElement
                      public void processElement(ProcessContext c) throws Exception {
                        c.output(c.element().toString());
                      }
                    }));

    result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));

    p.run();
  }

Source File: FlinkPipelineExecutionEnvironmentTest.java From beam with Apache License 2.0

6 votes

@Test
public void testTranslationModeNoOverrideWithoutUnboundedSources() {
  boolean[] testArgs = new boolean[] {true, false};
  for (boolean streaming : testArgs) {
    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
    options.setRunner(FlinkRunner.class);
    options.setStreaming(streaming);

    FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(GenerateSequence.from(0).to(10));
    flinkEnv.translate(pipeline);

    assertThat(options.isStreaming(), Matchers.is(streaming));
  }
}

Source File: MetricsPusherTest.java From beam with Apache License 2.0

6 votes

@Category({
  ValidatesRunner.class,
  UsesAttemptedMetrics.class,
  UsesCounterMetrics.class,
  UsesSystemMetrics.class
})
@Test
public void pushesSystemMetrics() throws InterruptedException {
  TestMetricsSink.clear();
  pipeline
      .apply(
          // Use maxReadTime to force unbounded mode.
          GenerateSequence.from(0).to(NUM_ELEMENTS).withMaxReadTime(Duration.standardDays(1)))
      .apply(ParDo.of(new CountingDoFn()));
  pipeline.run();
  // give metrics pusher time to push
  Thread.sleep(
      (pipeline.getOptions().as(MetricsOptions.class).getMetricsPushPeriod() + 1L) * 1000);
  assertThat(TestMetricsSink.getSystemCounters().isEmpty(), is(false));
}

Source File: FlattenTest.java From beam with Apache License 2.0

6 votes

@Test
@Category({ValidatesRunner.class, FlattenWithHeterogeneousCoders.class})
public void testFlattenMultipleCoders() throws CannotProvideCoderException {
  PCollection<Long> bigEndianLongs =
      p.apply(
          "BigEndianLongs",
          Create.of(0L, 1L, 2L, 3L, null, 4L, 5L, null, 6L, 7L, 8L, null, 9L)
              .withCoder(NullableCoder.of(BigEndianLongCoder.of())));
  PCollection<Long> varLongs =
      p.apply("VarLengthLongs", GenerateSequence.from(0).to(5)).setCoder(VarLongCoder.of());

  PCollection<Long> flattened =
      PCollectionList.of(bigEndianLongs)
          .and(varLongs)
          .apply(Flatten.pCollections())
          .setCoder(NullableCoder.of(VarLongCoder.of()));
  PAssert.that(flattened)
      .containsInAnyOrder(
          0L, 0L, 1L, 1L, 2L, 3L, 2L, 4L, 5L, 3L, 6L, 7L, 4L, 8L, 9L, null, null, null);
  p.run();
}

Source File: ReadData.java From java-docs-samples with Apache License 2.0

6 votes

public static void main(String[] args) {
  ReadDataOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(ReadDataOptions.class);
  Pipeline p = Pipeline.create(options);
  CloudBigtableTableConfiguration bigtableTableConfig =
      new CloudBigtableTableConfiguration.Builder()
          .withProjectId(options.getBigtableProjectId())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId())
          .build();

  // Initiates a new pipeline every second
  p.apply(GenerateSequence.from(0).withRate(1, new Duration(1000)))
      .apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig, options)));
  p.run();
}

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

@Test
public void retainOnlyPrimitivesComposites() {
  Pipeline p = Pipeline.create();
  p.apply(
      new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() {
        @Override
        public PCollection<Long> expand(PBegin input) {
          return input
              .apply(GenerateSequence.from(2L))
              .apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L))))
              .apply(MapElements.into(TypeDescriptors.longs()).via(l -> l + 1));
        }
      });

  Components originalComponents = PipelineTranslation.toProto(p).getComponents();
  Collection<String> primitiveComponents =
      QueryablePipeline.getPrimitiveTransformIds(originalComponents);

  // Read, Window.Assign, ParDo. This will need to be updated if the expansions change.
  assertThat(primitiveComponents, hasSize(3));
  for (String transformId : primitiveComponents) {
    assertThat(originalComponents.getTransformsMap(), hasKey(transformId));
  }
}

Source File: PCollectionTupleTest.java From beam with Apache License 2.0

6 votes

@Test
public void testEquals() {
  TestPipeline p = TestPipeline.create();
  TupleTag<Long> longTag = new TupleTag<>();
  PCollection<Long> longs = p.apply(GenerateSequence.from(0));
  TupleTag<String> strTag = new TupleTag<>();
  PCollection<String> strs = p.apply(Create.of("foo", "bar"));

  EqualsTester tester = new EqualsTester();
  // Empty tuples in the same pipeline are equal
  tester.addEqualityGroup(PCollectionTuple.empty(p), PCollectionTuple.empty(p));

  tester.addEqualityGroup(
      PCollectionTuple.of(longTag, longs).and(strTag, strs),
      PCollectionTuple.of(longTag, longs).and(strTag, strs));

  tester.addEqualityGroup(PCollectionTuple.of(longTag, longs));
  tester.addEqualityGroup(PCollectionTuple.of(strTag, strs));

  TestPipeline otherPipeline = TestPipeline.create();
  // Empty tuples in different pipelines are not equal
  tester.addEqualityGroup(PCollectionTuple.empty(otherPipeline));
  tester.testEquals();
}

Source File: EmptyFlattenAsCreateFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void getInputNonEmptyThrows() {
  PCollectionList<Long> nonEmpty =
      PCollectionList.of(pipeline.apply("unbounded", GenerateSequence.from(0)))
          .and(pipeline.apply("bounded", GenerateSequence.from(0).to(100)));
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(nonEmpty.expand().toString());
  thrown.expectMessage(EmptyFlattenAsCreateFactory.class.getSimpleName());
  factory.getReplacementTransform(
      AppliedPTransform.of(
          "nonEmptyInput",
          nonEmpty.expand(),
          Collections.emptyMap(),
          Flatten.pCollections(),
          pipeline));
}

Source File: WindowIntoTranslationTest.java From beam with Apache License 2.0

6 votes

@Test
public void testToFromProto() throws InvalidProtocolBufferException {
  pipeline.apply(GenerateSequence.from(0)).apply(Window.<Long>into((WindowFn) windowFn));

  final AtomicReference<AppliedPTransform<?, ?, Assign<?>>> assign = new AtomicReference<>(null);
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          if (node.getTransform() instanceof Window.Assign) {
            checkState(assign.get() == null);
            assign.set(
                (AppliedPTransform<?, ?, Assign<?>>) node.toAppliedPTransform(getPipeline()));
          }
        }
      });
  checkState(assign.get() != null);

  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  WindowIntoPayload payload =
      WindowIntoTranslation.toProto(assign.get().getTransform(), components);

  assertEquals(windowFn, WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()));
}

Source File: TransformInputsTest.java From beam with Apache License 2.0

6 votes

@Test
public void nonAdditionalInputsWithOnlyAdditionalInputsThrows() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional-only",
          additionalInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("at least one");
  TransformInputs.nonAdditionalInputs(transform);
}

Source File: TransformInputsTest.java From beam with Apache License 2.0

6 votes

@Test
public void nonAdditionalInputsWithAdditionalInputsSucceeds() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  Map<TupleTag<?>, PValue> allInputs = new HashMap<>();
  PCollection<Integer> mainInts = pipeline.apply("MainInput", Create.of(12, 3));
  allInputs.put(new TupleTag<Integer>() {}, mainInts);
  PCollection<Void> voids = pipeline.apply("VoidInput", Create.empty(VoidCoder.of()));
  allInputs.put(new TupleTag<Void>() {}, voids);
  allInputs.putAll(additionalInputs);

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional",
          allInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  assertThat(
      TransformInputs.nonAdditionalInputs(transform),
      Matchers.containsInAnyOrder(mainInts, voids));
}

Source File: PTransformTranslationTest.java From beam with Apache License 2.0

6 votes

private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
  PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.asSingleton());
  PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
  ParDo.MultiOutput<Long, KV<Long, String>> parDo =
      ParDo.of(new TestDoFn())
          .withSideInputs(view)
          .withOutputTags(
              new TupleTag<KV<Long, String>>() {},
              TupleTagList.of(new TupleTag<KV<String, Long>>() {}));
  PCollectionTuple output = input.apply(parDo);

  Map<TupleTag<?>, PValue> inputs = new HashMap<>();
  inputs.putAll(parDo.getAdditionalInputs());
  inputs.putAll(input.expand());

  return AppliedPTransform
      .<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of(
          "MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

6 votes

@Test
public void testCreateNeverWithStreaming() throws Exception {
  p.enableAbandonedNodeEnforcement(false);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId("dataset");
  tableRef.setTableId("sometable");

  PCollection<TableRow> tableRows =
      p.apply(GenerateSequence.from(0))
          .apply(
              MapElements.via(
                  new SimpleFunction<Long, TableRow>() {
                    @Override
                    public TableRow apply(Long input) {
                      return null;
                    }
                  }))
          .setCoder(TableRowJsonCoder.of());
  tableRows.apply(
      BigQueryIO.writeTableRows()
          .to(tableRef)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
          .withoutValidation());
}

Source File: V1WriteIT.java From beam with Apache License 2.0

6 votes

/**
 * An end-to-end test for {@link DatastoreV1.Write}.
 *
 * <p>Write some test entities to Cloud Datastore. Read and count all the entities. Verify that
 * the count matches the number of entities written.
 */
@Test
public void testE2EV1Write() throws Exception {
  Pipeline p = Pipeline.create(options);

  // Write to datastore
  p.apply(GenerateSequence.from(0).to(numEntities))
      .apply(ParDo.of(new CreateEntityFn(options.getKind(), options.getNamespace(), ancestor, 0)))
      .apply(DatastoreIO.v1().write().withProjectId(project));

  p.run();

  // Count number of entities written to datastore.
  long numEntitiesWritten = countEntities(options, project, ancestor);

  assertEquals(numEntities, numEntitiesWritten);
}

Source File: SpannerWriteIT.java From beam with Apache License 2.0

6 votes

@Test
public void testWrite() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}

Source File: KuduIOIT.java From beam with Apache License 2.0

6 votes

private void runWrite() throws Exception {
  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply(
          "Write records to Kudu",
          KuduIO.write()
              .withMasterAddresses(options.getKuduMasterAddresses())
              .withTable(options.getKuduTable())
              .withFormatFn(new GenerateUpsert()));
  writePipeline.run().waitUntilFinish();

  Assert.assertThat(
      "Wrong number of records in table",
      rowCount(kuduTable),
      equalTo(options.getNumberOfRecords()));
}

Source File: SpannerWriteIT.java From beam with Apache License 2.0

6 votes

@Test
public void testReportFailures() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName)
              .withFailureMode(SpannerIO.FailureMode.REPORT_FAILURES));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}

Source File: SpannerWriteIT.java From beam with Apache License 2.0

6 votes

@Test
public void testFailFast() throws Exception {
  thrown.expect(new StackTraceContainsString("SpannerException"));
  thrown.expect(new StackTraceContainsString("Value must not be NULL in table users"));
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName));

  PipelineResult result = p.run();
  result.waitUntilFinish();
}

Source File: SdkComponentsTest.java From beam with Apache License 2.0

5 votes

@Test
public void registerPCollection() throws IOException {
  PCollection<Long> pCollection = pipeline.apply(GenerateSequence.from(0)).setName("foo");
  String id = components.registerPCollection(pCollection);
  assertThat(id, equalTo("foo"));
  components.toComponents().getPcollectionsOrThrow(id);
}

Source File: MongoDBIOIT.java From beam with Apache License 2.0

5 votes

@Test
public void testWriteAndRead() {
  initialCollectionSize = getCollectionSizeInBytes(collection);

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply("Produce documents", MapElements.via(new LongToDocumentFn()))
      .apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time")))
      .apply(
          "Write documents to MongoDB",
          MongoDbIO.write()
              .withUri(mongoUrl)
              .withDatabase(options.getMongoDBDatabaseName())
              .withCollection(collection));
  PipelineResult writeResult = writePipeline.run();
  writeResult.waitUntilFinish();

  finalCollectionSize = getCollectionSizeInBytes(collection);

  PCollection<String> consolidatedHashcode =
      readPipeline
          .apply(
              "Read all documents",
              MongoDbIO.read()
                  .withUri(mongoUrl)
                  .withDatabase(options.getMongoDBDatabaseName())
                  .withCollection(collection))
          .apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time")))
          .apply("Map documents to Strings", MapElements.via(new DocumentToStringFn()))
          .apply("Calculate hashcode", Combine.globally(new HashingFn()));

  String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
  PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);

  PipelineResult readResult = readPipeline.run();
  readResult.waitUntilFinish();
  collectAndPublishMetrics(writeResult, readResult);
}

Source File: SimpleFileIOOutputRuntimeUnboundedTest.java From components with Apache License 2.0

5 votes

/**
 * Basic unit test writing to Avro.
 */
@Test
public void testBasicAvroUnbounded() throws IOException, URISyntaxException {
    String fileSpec = mini
            .getLocalFs()
            .getUri()
            .resolve(new Path(mini.newFolder().toString(), "output.avro").toUri())
            .toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> input = p //
            .apply(GenerateSequence.from(0).withRate(10, Duration.millis(1000))) //
            .apply(ParDo.of(new GenerateDoFn()));
    input.apply(runtime);

    // And run the test.
    PipelineResult pr = p.run();

    // Check the expected values.
    // TODO(rskraba): Implement a comparison for the file on disk.
    // mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}

Source File: ReadSourcePortableTest.java From beam with Apache License 2.0

5 votes

@Test(timeout = 120_000)
public void testExecution() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
  options.setRunner(CrashingRunner.class);
  options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
  options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
  options.as(FlinkPipelineOptions.class).setParallelism(2);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline p = Pipeline.create(options);
  PCollection<Long> result = p.apply(GenerateSequence.from(0L).to(10L));
  PAssert.that(result)
      .containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));

  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);

  // execute the pipeline
  JobInvocation jobInvocation =
      FlinkJobInvoker.create(null)
          .createJobInvocation(
              "fakeId",
              "fakeRetrievalToken",
              flinkJobExecutor,
              pipelineProto,
              options.as(FlinkPipelineOptions.class),
              new FlinkPipelineRunner(
                  options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
  jobInvocation.start();
  while (jobInvocation.getState() != Enum.DONE) {
    Thread.sleep(100);
  }
}

Source File: CassandraIOIT.java From beam with Apache License 2.0

5 votes

private void runWrite() {
  pipelineWrite
      .apply("GenSequence", GenerateSequence.from(0).to((long) options.getNumberOfRecords()))
      .apply("PrepareTestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn()))
      .apply("MapToEntity", ParDo.of(new CreateScientistFn()))
      .apply(
          "WriteToCassandra",
          CassandraIO.<Scientist>write()
              .withHosts(options.getCassandraHost())
              .withPort(options.getCassandraPort())
              .withKeyspace(KEYSPACE)
              .withEntity(Scientist.class));

  pipelineWrite.run().waitUntilFinish();
}

Source File: KuduIOTest.java From beam with Apache License 2.0

5 votes

/**
 * Test the write path using a {@link FakeWriter} and verifies the expected log statements are
 * written. This test ensures that the {@link KuduIO} correctly respects parallelism by
 * deserializing writers and that each writer is opening and closing Kudu sessions.
 */
@Test
@Ignore
public void testWrite() throws Exception {
  when(mockWriteService.createWriter(any())).thenReturn(new FakeWriter());

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(numberRecords))
      .apply(
          "Write records to Kudu",
          KuduIO.write()
              .withMasterAddresses("ignored")
              .withTable("ignored")
              .withFormatFn(new GenerateUpsert()) // ignored (mocking Operation is pointless)
              .withKuduService(mockWriteService));
  writePipeline.run().waitUntilFinish();

  for (int i = 1; i <= targetParallelism; i++) {
    expectedWriteLogs.verifyDebug(String.format(FakeWriter.LOG_OPEN_SESSION, i));
    expectedWriteLogs.verifyDebug(
        String.format(FakeWriter.LOG_WRITE, i)); // at least one per writer
    expectedWriteLogs.verifyDebug(String.format(FakeWriter.LOG_CLOSE_SESSION, i));
  }
  // verify all entries written
  for (int n = 0; n < numberRecords; n++) {
    expectedWriteLogs.verifyDebug(
        String.format(FakeWriter.LOG_WRITE_VALUE, n)); // at least one per writer
  }
}

Source File: FlinkPipelineExecutionEnvironmentTest.java From beam with Apache License 2.0

5 votes

@Test
public void shouldLogWarningWhenCheckpointingIsDisabled() {
  Pipeline pipeline = Pipeline.create();
  pipeline.getOptions().setRunner(TestFlinkRunner.class);

  pipeline
      // Add an UnboundedSource to check for the warning if checkpointing is disabled
      .apply(GenerateSequence.from(0))
      .apply(
          ParDo.of(
              new DoFn<Long, Void>() {
                @ProcessElement
                public void processElement(ProcessContext ctx) {
                  throw new RuntimeException("Failing here is ok.");
                }
              }));

  final PrintStream oldErr = System.err;
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  PrintStream replacementStdErr = new PrintStream(byteArrayOutputStream);
  try {
    System.setErr(replacementStdErr);
    // Run pipeline and fail during execution
    pipeline.run();
    fail("Should have failed");
  } catch (Exception e) {
    // We want to fail here
  } finally {
    System.setErr(oldErr);
  }
  replacementStdErr.flush();
  assertThat(
      new String(byteArrayOutputStream.toByteArray(), Charsets.UTF_8),
      containsString(
          "UnboundedSources present which rely on checkpointing, but checkpointing is disabled."));
}

Source File: PipelineTranslationTest.java From beam with Apache License 2.0

5 votes

@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
  Pipeline trivialPipeline = Pipeline.create();
  trivialPipeline.apply(Create.of(1, 2, 3));

  Pipeline sideInputPipeline = Pipeline.create();
  final PCollectionView<String> singletonView =
      sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton());
  sideInputPipeline
      .apply(Create.of("main input"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                      // actually never executed and no effect on translation
                      c.sideInput(singletonView);
                    }
                  })
              .withSideInputs(singletonView));

  Pipeline complexPipeline = Pipeline.create();
  BigEndianLongCoder customCoder = BigEndianLongCoder.of();
  PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
  PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
  PCollection<Long> windowed =
      counted.apply(
          Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(19)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(3L)));
  final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
  PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo"));
  PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create());

  return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}

Source File: FlinkPipelineExecutionEnvironmentTest.java From beam with Apache License 2.0

5 votes

@Test
public void testTranslationModeOverrideWithUnboundedSources() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);
  options.setStreaming(false);

  FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(GenerateSequence.from(0));
  flinkEnv.translate(pipeline);

  assertThat(options.isStreaming(), Matchers.is(true));
}

Source File: FlinkSubmissionTest.java From beam with Apache License 2.0

5 votes

/** The Flink program which is executed by the CliFrontend. */
public static void main(String[] args) {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);
  options.setStreaming(streaming);
  options.setParallelism(1);
  Pipeline p = Pipeline.create(options);
  p.apply(GenerateSequence.from(0).to(1));
  p.run();
}

org.apache.beam.sdk.io.GenerateSequence Java Examples