org.apache.beam.sdk.PipelineResult Java Exaples

Source File: MongoDBIOIT.java From beam with Apache License 2.0

6 votes

private void collectAndPublishMetrics(PipelineResult writeResult, PipelineResult readResult) {
  String uuid = UUID.randomUUID().toString();
  String timestamp = Timestamp.now().toString();

  Set<Function<MetricsReader, NamedTestResult>> readSuppliers = getReadSuppliers(uuid, timestamp);
  Set<Function<MetricsReader, NamedTestResult>> writeSuppliers =
      getWriteSuppliers(uuid, timestamp);
  IOITMetrics readMetrics =
      new IOITMetrics(readSuppliers, readResult, NAMESPACE, uuid, timestamp);
  IOITMetrics writeMetrics =
      new IOITMetrics(writeSuppliers, writeResult, NAMESPACE, uuid, timestamp);
  readMetrics.publish(bigQueryDataset, bigQueryTable);
  readMetrics.publishToInflux(settings);
  writeMetrics.publish(bigQueryDataset, bigQueryTable);
  writeMetrics.publishToInflux(settings);
}

Source File: SpannerWriteIT.java From beam with Apache License 2.0

6 votes

@Test
public void testReportFailures() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName)
              .withFailureMode(SpannerIO.FailureMode.REPORT_FAILURES));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}

Source File: ParquetToBigtable.java From DataflowTemplates with Apache License 2.0

6 votes

public static PipelineResult run(Options options) {
  Pipeline pipeline = Pipeline.create(options);

  BigtableIO.Write write =
          BigtableIO.write()
                  .withProjectId(options.getBigtableProjectId())
                  .withInstanceId(options.getBigtableInstanceId())
                  .withTableId(options.getBigtableTableId());

  /**
   * Steps: 1) Read records from Parquet File. 2) Convert a GenericRecord to a
   * KV<ByteString,Iterable<Mutation>>. 3) Write KV to Bigtable's table.
   */
  pipeline
      .apply(
          "Read from Parquet",
          ParquetIO.read(BigtableRow.getClassSchema()).from(options.getInputFilePattern()))
      .apply(
          "Transform to Bigtable",
          ParDo.of(
              ParquetToBigtableFn.createWithSplitLargeRows(
                  options.getSplitLargeRows(), MAX_MUTATIONS_PER_ROW)))
      .apply("Write to Bigtable", write);

  return pipeline.run();
}

Source File: DirectRunnerTest.java From beam with Apache License 2.0

6 votes

@Test
public void testWaitUntilFinishTimeout() throws Exception {
  DirectOptions options = PipelineOptionsFactory.as(DirectOptions.class);
  options.setBlockOnRun(false);
  options.setRunner(DirectRunner.class);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of(1L))
      .apply(
          ParDo.of(
              new DoFn<Long, Long>() {
                @ProcessElement
                public void hang(ProcessContext context) throws InterruptedException {
                  // Hangs "forever"
                  Thread.sleep(Long.MAX_VALUE);
                }
              }));
  PipelineResult result = p.run();
  // The pipeline should never complete;
  assertThat(result.getState(), is(State.RUNNING));
  // Must time out, otherwise this test will never complete
  result.waitUntilFinish(Duration.millis(1L));
  assertEquals(null, result.getState());
}

Source File: PubsubIntegrationTest.java From gcp-ingestion with Mozilla Public License 2.0

6 votes

@Test(timeout = 30000)
public void canReadPubsubInput() throws Exception {
  List<String> inputLines = Lines.resources("testdata/basic-messages-nonempty.ndjson");
  publishLines(inputLines);

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setInput(pipeline.newProvider(subscriptionName.toString()));

  PCollection<String> output = pipeline.apply(InputType.pubsub.read(sinkOptions))
      .apply("encodeJson", OutputFileFormat.json.encode());

  PAssert.that(output).containsInAnyOrder(inputLines);

  // This runs in the background and returns immediately due to setBlockOnRun above.
  PipelineResult result = pipeline.run();

  // The wait here is determined empirically; it's not entirely clear why it takes this long.
  System.err.println("Waiting 15 seconds to make sure we've processed all messages...");
  result.waitUntilFinish(Duration.millis(15000));
  System.err.println("Done waiting; now cancelling the pipeline so the test can finish.");
  result.cancel();
}

Source File: IndexedRecordToJsonTest.java From component-runtime with Apache License 2.0

6 votes

@Test
public void test() {
    PAssert
            .that(pipeline
                    .apply(Create
                            .of(newIndexedRecord("first"), newIndexedRecord("second"))
                            .withCoder(AvroCoder.of(IndexedRecord.class, getSchema())))
                    .apply(new IndexedRecordToJson()))
            .satisfies(values -> {
                assertEquals(asList("first", "second"),
                        StreamSupport
                                .stream(values.spliterator(), false)
                                .map(k -> k.getString("name"))
                                .sorted()
                                .collect(toList()));
                return null;
            });
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}

Source File: PubsubIntegrationTest.java From gcp-ingestion with Mozilla Public License 2.0

6 votes

@Test(timeout = 30000)
public void canSendPubsubOutput() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  // We would normally use pipeline.newProvider instead of StaticValueProvider in tests,
  // but something about this configuration causes the pipeline to stall when CompressPayload
  // accesses a method on the underlying enum value when defined via pipeline.newProvider.
  sinkOptions.setOutputPubsubCompression(StaticValueProvider.of(Compression.UNCOMPRESSED));

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(OutputType.pubsub.write(sinkOptions));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/truncated.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}

Source File: TextToPubsub.java From DataflowTemplates with Apache License 2.0

6 votes

/**
 * Executes the pipeline with the provided execution
 * parameters.
 *
 * @param options The execution parameters.
 */
public static PipelineResult run(Options options) {
  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  /*
   * Steps:
   *  1) Read from the text source.
   *  2) Write each text record to Pub/Sub
   */
  pipeline
      .apply("Read Text Data", TextIO.read().from(options.getInputFilePattern()))
      .apply("Write to PubSub", PubsubIO.writeStrings().to(options.getOutputTopic()));

  return pipeline.run();
}

Source File: TalendIOTest.java From component-runtime with Apache License 2.0

6 votes

@Test
public void output() {
    Output.DATA.clear();
    pipeline
            .apply(Create.of(new Sample("a"), new Sample("b")).withCoder(JsonbCoder.of(Sample.class, PLUGIN)))
            .apply(UUID.randomUUID().toString(), toRecord())
            .setCoder(SchemaRegistryCoder.of())
            .apply(new ViewsMappingTransform(emptyMap(), PLUGIN))
            .apply(TalendIO.write(new BaseTestProcessor() {

                @Override
                public void onNext(final InputFactory input, final OutputFactory factory) {
                    final Object read = input.read(Branches.DEFAULT_BRANCH);
                    Output.DATA.add(Record.class.cast(read).getString("data"));
                }
            }));
    assertEquals(PipelineResult.State.DONE, pipeline.run().getState());
    assertThat(Output.DATA, containsInAnyOrder("a", "b"));
}

Source File: PubsubIntegrationTest.java From gcp-ingestion with Mozilla Public License 2.0

6 votes

@Test(timeout = 30000)
public void canSendGzippedPayloads() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions sinkOptions = pipeline.getOptions().as(SinkOptions.class);
  sinkOptions.setOutputType(OutputType.pubsub);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  SinkOptions.Parsed options = SinkOptions.parseSinkOptions(sinkOptions);

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(options.getOutputType().write(options));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/gzipped.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}

Source File: WordCountTest.java From DataflowTemplates with Apache License 2.0

6 votes

@Test
@Category(NeedsRunner.class)
public void testWordCountSimple() {
  PCollection<KV<String, Long>> pc =
      pipeline.apply(Create.of(INPUT_STRS)).apply(new CountWords());
  PAssert.that(pc).containsInAnyOrder(KV.of("hello", 2L), KV.of(("world"), 1L));
  PipelineResult result = pipeline.run();
  result.waitUntilFinish();

  Map<String, Long> expectedCounters = new HashMap<>();
  expectedCounters.put("emptyLines", 2L);
  for (MetricResult c :
      result.metrics().queryMetrics(MetricsFilter.builder().build()).getCounters()) {
    String name = c.getName().getName();
    if (expectedCounters.containsKey(name)) {
      assertEquals(expectedCounters.get(name), c.getCommitted());
      expectedCounters.remove(name);
    }
  }
  assertTrue(expectedCounters.isEmpty());
}

Source File: ExportTimestampTest.java From DataflowTemplates with Apache License 2.0

6 votes

private void exportAndImportDbAtTime(String sourceDb, String destDb,
                                     String jobIdName, String ts,
                                     TestPipeline exportPipeline,
                                     TestPipeline importPipeline) {
  ValueProvider.StaticValueProvider<String> destination = ValueProvider.StaticValueProvider
      .of(tmpDir);
  ValueProvider.StaticValueProvider<String> jobId = ValueProvider.StaticValueProvider
      .of(jobIdName);
  ValueProvider.StaticValueProvider<String> source = ValueProvider.StaticValueProvider
      .of(tmpDir + "/" + jobIdName);
  ValueProvider.StaticValueProvider<String> timestamp = ValueProvider.StaticValueProvider.of(ts);
  SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb);
  exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination,
                                                     jobId, timestamp));
  PipelineResult exportResult = exportPipeline.run();
  exportResult.waitUntilFinish();

  SpannerConfig copyConfig = spannerServer.getSpannerConfig(destDb);
  importPipeline.apply("Import", new ImportTransform(
      copyConfig, source, ValueProvider.StaticValueProvider.of(true),
      ValueProvider.StaticValueProvider.of(true),
      ValueProvider.StaticValueProvider.of(true)));
  PipelineResult importResult = importPipeline.run();
  importResult.waitUntilFinish();
}

Source File: BeamPipelineEngine.java From hop with Apache License 2.0

6 votes

private PipelineResult executePipeline( org.apache.beam.sdk.Pipeline pipeline ) throws HopException {

    RunnerType runnerType = beamEngineRunConfiguration.getRunnerType();
    switch ( runnerType ) {
      case Direct:
        return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink:
        return FlinkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case DataFlow:
        return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark:
        return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new HopException( "Execution on runner '" + runnerType.name() + "' is not supported yet." );
    }
  }

Source File: BigQueryDatasetRuntime.java From components with Apache License 2.0

6 votes

public void getSampleDeprecated(int limit, Consumer<IndexedRecord> consumer) {
    // Create a pipeline using the input component to get records.
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);

    // Create an input runtime based on the properties.
    BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
    BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);

    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        p
                .apply(inputRuntime) //
                .apply(Sample.<IndexedRecord> any(limit))
                .apply(collector);
        PipelineResult pr = p.run();
        pr.waitUntilFinish();
    }
}

Source File: TalendIOTest.java From component-runtime with Apache License 2.0

6 votes

@Test
public void processorMulti() {
    final PCollection<SampleLength> out = pipeline
            .apply(Create.of(new Sample("a"), new Sample("bb")).withCoder(JsonbCoder.of(Sample.class, PLUGIN)))
            .apply(UUID.randomUUID().toString(), toRecord())
            .setCoder(SchemaRegistryCoder.of())
            .apply(new ViewsMappingTransform(emptyMap(), PLUGIN))
            .apply(TalendFn.asFn(new BaseTestProcessor() {

                @Override
                public void onNext(final InputFactory input, final OutputFactory factory) {
                    final Object read = input.read(Branches.DEFAULT_BRANCH);
                    factory
                            .create(Branches.DEFAULT_BRANCH)
                            .emit(new Sample(Record.class.cast(read).getString("data")));
                }
            }))
            .apply(toSampleLength());
    PAssert.that(out.apply(UUID.randomUUID().toString(), toInt())).containsInAnyOrder(1, 2);
    assertEquals(PipelineResult.State.DONE, pipeline.run().getState());
}

Source File: MongoDBIOIT.java From beam with Apache License 2.0

5 votes

@Test
public void testWriteAndRead() {
  initialCollectionSize = getCollectionSizeInBytes(collection);

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply("Produce documents", MapElements.via(new LongToDocumentFn()))
      .apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time")))
      .apply(
          "Write documents to MongoDB",
          MongoDbIO.write()
              .withUri(mongoUrl)
              .withDatabase(options.getMongoDBDatabaseName())
              .withCollection(collection));
  PipelineResult writeResult = writePipeline.run();
  writeResult.waitUntilFinish();

  finalCollectionSize = getCollectionSizeInBytes(collection);

  PCollection<String> consolidatedHashcode =
      readPipeline
          .apply(
              "Read all documents",
              MongoDbIO.read()
                  .withUri(mongoUrl)
                  .withDatabase(options.getMongoDBDatabaseName())
                  .withCollection(collection))
          .apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time")))
          .apply("Map documents to Strings", MapElements.via(new DocumentToStringFn()))
          .apply("Calculate hashcode", Combine.globally(new HashingFn()));

  String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
  PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);

  PipelineResult readResult = readPipeline.run();
  readResult.waitUntilFinish();
  collectAndPublishMetrics(writeResult, readResult);
}

Source File: SimpleFileIOOutputRuntimeUnboundedTest.java From components with Apache License 2.0

5 votes

/**
 * Basic unit test writing to Avro.
 */
@Test
public void testBasicAvroUnboundedWithWindow() throws IOException, URISyntaxException {
    String fileSpec = mini
            .getLocalFs()
            .getUri()
            .resolve(new Path(mini.newFolder().toString(), "output.avro").toUri())
            .toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> input = p //
            .apply(GenerateSequence.from(0).withRate(10, Duration.millis(1000))) //
            .apply(ParDo.of(new GenerateDoFn()))
            .apply(Window.<IndexedRecord> into(FixedWindows.of(Duration.millis(30000))));

    input.apply(runtime);

    // And run the test.
    PipelineResult pr = p.run();

    // Check the expected values.
    // TODO(rskraba): Implement a comparison for the file on disk.
    // mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}

Source File: JobInvocationTest.java From beam with Apache License 2.0

5 votes

@Test(timeout = 10_000)
public void testStateAfterCompletion() throws Exception {
  jobInvocation.start();
  assertThat(jobInvocation.getState(), is(JobApi.JobState.Enum.RUNNING));

  TestPipelineResult pipelineResult = new TestPipelineResult(PipelineResult.State.DONE);
  runner.setResult(pipelineResult);

  awaitJobState(jobInvocation, JobApi.JobState.Enum.DONE);
}

Source File: MetricsTest.java From beam with Apache License 2.0

5 votes

private static MetricQueryResults queryTestMetrics(PipelineResult result) {
  return result
      .metrics()
      .queryMetrics(
          MetricsFilter.builder()
              .addNameFilter(MetricNameFilter.inNamespace(MetricsTest.class))
              .build());
}

Source File: TestPipeline.java From beam with Apache License 2.0

5 votes

/** Like {@link #run} but with the given potentially modified options. */
@Override
public PipelineResult run(PipelineOptions options) {
  checkState(
      enforcement.isPresent(),
      "Is your TestPipeline declaration missing a @Rule annotation? Usage: "
          + "@Rule public final transient TestPipeline pipeline = TestPipeline.create();");

  final PipelineResult pipelineResult;
  try {
    enforcement.get().beforePipelineExecution();
    PipelineOptions updatedOptions =
        MAPPER.convertValue(MAPPER.valueToTree(options), PipelineOptions.class);
    updatedOptions
        .as(TestValueProviderOptions.class)
        .setProviderRuntimeValues(StaticValueProvider.of(providerRuntimeValues));
    pipelineResult = super.run(updatedOptions);
    verifyPAssertsSucceeded(this, pipelineResult);
  } catch (RuntimeException exc) {
    Throwable cause = exc.getCause();
    if (cause instanceof AssertionError) {
      throw (AssertionError) cause;
    } else {
      throw exc;
    }
  }

  // If we reach this point, the pipeline has been run and no exceptions have been thrown during
  // its execution.
  enforcement.get().afterPipelineExecution();
  return pipelineResult;
}

Source File: SparkStructuredStreamingPipelineResult.java From beam with Apache License 2.0

5 votes

private State awaitTermination(Duration duration)
    throws TimeoutException, ExecutionException, InterruptedException {
  pipelineExecution.get(duration.getMillis(), TimeUnit.MILLISECONDS);
  // Throws an exception if the job is not finished successfully in the given time.
  // TODO: all streaming functionality
  return PipelineResult.State.DONE;
}

Source File: ExportPipeline.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Runs a pipeline to export a Cloud Spanner database to Avro files.
 *
 * @param args arguments to the pipeline
 */
public static void main(String[] args) {

  ExportPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(ExportPipelineOptions.class);

  Pipeline p = Pipeline.create(options);

  SpannerConfig spannerConfig =
      SpannerConfig.create()
          .withProjectId(options.getSpannerProjectId())
          .withHost(options.getSpannerHost())
          .withInstanceId(options.getInstanceId())
          .withDatabaseId(options.getDatabaseId());
  p.begin()
      .apply(
          "Run Export",
          new ExportTransform(spannerConfig, options.getOutputDir(), options.getTestJobId(),
                              options.getSnapshotTime()));
  PipelineResult result = p.run();
  if (options.getWaitUntilFinish() &&
      /* Only if template location is null, there is a dataflow job to wait for. Else it's
       * template generation which doesn't start a dataflow job.
       */
      options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) {
    result.waitUntilFinish();
  }
}

Source File: SparkPipelineStateTest.java From beam with Apache License 2.0

5 votes

private void testFailedPipeline(final SparkPipelineOptions options) throws Exception {

    SparkPipelineResult result = null;

    try {
      final Pipeline pipeline = Pipeline.create(options);
      pipeline
          .apply(getValues(options))
          .setCoder(StringUtf8Coder.of())
          .apply(
              MapElements.via(
                  new SimpleFunction<String, String>() {

                    @Override
                    public String apply(final String input) {
                      throw new MyCustomException(FAILED_THE_BATCH_INTENTIONALLY);
                    }
                  }));

      result = (SparkPipelineResult) pipeline.run();
      result.waitUntilFinish();
    } catch (final Exception e) {
      assertThat(e, instanceOf(Pipeline.PipelineExecutionException.class));
      assertThat(e.getCause(), instanceOf(MyCustomException.class));
      assertThat(e.getCause().getMessage(), is(FAILED_THE_BATCH_INTENTIONALLY));
      assertThat(result.getState(), is(PipelineResult.State.FAILED));
      result.cancel();
      return;
    }

    fail("An injected failure did not affect the pipeline as expected.");
  }

Source File: BigQueryMergeValidatorTemplate.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
  // Create the pipeline
  Pipeline pipeline = Pipeline.create(options);
  String replicaTable = options.getReplicaTable();
  String stagingTable = options.getStagingTable();

  pipeline
      .apply(Create.of(1))
      .apply(
          ParDo.of(
              new DoFn<Integer, MergeInfo>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  MergeInfo mergeInfo =
                      MergeInfo.create(
                          "_metadata_timestamp",
                          "_metadata_deleted",
                          replicaTable,
                          stagingTable,
                          ALL_FIELDS,
                          ALL_PK_FIELDS);
                  c.output(mergeInfo);
                }
              }))
      .apply(new BigQueryMerger(
          Duration.standardMinutes(1), null, MergeConfiguration.bigQueryConfiguration()));

  return pipeline.run();
}

Source File: BigQueryToElasticsearch.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
private static PipelineResult run(BigQueryToElasticsearchReadOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  /*
   * Steps: 1) Read records from BigQuery via BigQueryIO.
   *        2) Create json string from Table Row.
   *        3) Write records to Elasticsearch.
   *
   *
   * Step #1: Read from BigQuery. If a query is provided then it is used to get the TableRows.
   */
  pipeline
      .apply(
          "ReadFromBigQuery",
          ReadBigQuery.newBuilder()
              .setOptions(options.as(BigQueryToElasticsearchReadOptions.class))
              .build())

      /*
       * Step #2: Convert table rows to JSON documents.
       */
      .apply("TableRowsToJsonDocument", ParDo.of(new TableRowToJsonFn()))

      /*
       * Step #3: Write converted records to Elasticsearch
       */
      .apply(
          "WriteToElasticsearch",
          WriteToElasticsearch.newBuilder()
              .setOptions(options.as(WriteToElasticsearchOptions.class))
              .build());

  return pipeline.run();
}

Source File: ParquetIOIT.java From beam with Apache License 2.0

5 votes

private void collectAndPublishMetrics(PipelineResult result) {
  String uuid = UUID.randomUUID().toString();
  String timestamp = Timestamp.now().toString();
  Set<Function<MetricsReader, NamedTestResult>> metricSuppliers =
      fillMetricSuppliers(uuid, timestamp);
  final IOITMetrics metrics =
      new IOITMetrics(metricSuppliers, result, PARQUET_NAMESPACE, uuid, timestamp);
  metrics.publish(bigQueryDataset, bigQueryTable);
  metrics.publishToInflux(settings);
}

Source File: CopyDbTest.java From DataflowTemplates with Apache License 2.0

5 votes

private void runTest() {
  String tmpDirPath = tmpDir.getRoot().getAbsolutePath();
  ValueProvider.StaticValueProvider<String> destination = ValueProvider.StaticValueProvider
      .of(tmpDirPath);
  ValueProvider.StaticValueProvider<String> jobId = ValueProvider.StaticValueProvider
      .of("jobid");
  ValueProvider.StaticValueProvider<String> source = ValueProvider.StaticValueProvider
      .of(tmpDirPath + "/jobid");

  SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb);
  exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination, jobId));
  PipelineResult exportResult = exportPipeline.run();
  exportResult.waitUntilFinish();

  SpannerConfig destConfig = spannerServer.getSpannerConfig(destinationDb);
  importPipeline.apply(
      "Import",
      new ImportTransform(
          destConfig,
          source,
          ValueProvider.StaticValueProvider.of(true),
          ValueProvider.StaticValueProvider.of(true),
          ValueProvider.StaticValueProvider.of(true)));
  PipelineResult importResult = importPipeline.run();
  importResult.waitUntilFinish();

  PCollection<Long> mismatchCount =
      comparePipeline.apply("Compare", new CompareDatabases(sourceConfig, destConfig));
  PAssert.that(mismatchCount).satisfies((x) -> {
    assertEquals(Lists.newArrayList(x), Lists.newArrayList(0L));
    return null;
  });
  PipelineResult compareResult = comparePipeline.run();
  compareResult.waitUntilFinish();

  Ddl sourceDdl = readDdl(sourceDb);
  Ddl destinationDdl = readDdl(destinationDb);

  assertThat(sourceDdl.prettyPrint(), equalToIgnoringWhiteSpace(destinationDdl.prettyPrint()));
}

Source File: MetricsReaderTest.java From beam with Apache License 2.0

5 votes

@Test
public void testTimeIsMinusOneIfTimeMetricIsTooFarFromNow() {
  List<Integer> sampleInputData = Arrays.asList(1, 5, 5, 5, 5);

  createTestPipeline(sampleInputData, new MonitorWithTimeDistribution());
  PipelineResult result = testPipeline.run();

  MetricsReader reader = new MetricsReader(result, NAMESPACE, 900000000001L);

  assertEquals(-1, reader.getStartTimeMetric("timeDist"));
  assertEquals(-1, reader.getEndTimeMetric("timeDist"));
}

Source File: TrafficRoutes.java From beam with Apache License 2.0

5 votes

public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractStationSpeedFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new TrackSpeed())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}

Source File: TestJetRunner.java From beam with Apache License 2.0

5 votes

@Override
public PipelineResult run(Pipeline pipeline) {
  Collection<JetInstance> instances = initMemberInstances(factory);
  try {
    PipelineResult result = delegate.run(pipeline);
    if (result instanceof FailedRunningPipelineResults) {
      throw ((FailedRunningPipelineResults) result).getCause();
    }
    result.waitUntilFinish();
    return result;
  } finally {
    killMemberInstances(instances, factory);
  }
}

org.apache.beam.sdk.PipelineResult Java Examples