org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method Java Exaples

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

6 votes

@Test
public void testWriteToTableDecorator() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id$20171127")
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(schema)
              .withoutValidation());
  p.run();
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

6 votes

@Test
public void testSchemaWriteStreams() throws Exception {
  p.apply(
          Create.of(
              new SchemaPojo("a", 1),
              new SchemaPojo("b", 2),
              new SchemaPojo("c", 3),
              new SchemaPojo("d", 4)))
      .apply(
          BigQueryIO.<SchemaPojo>write()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(Method.STREAMING_INSERTS)
              .useBeamSchema()
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(
          new TableRow().set("name", "a").set("number", "1"),
          new TableRow().set("name", "b").set("number", "2"),
          new TableRow().set("name", "c").set("number", "3"),
          new TableRow().set("name", "d").set("number", "4")));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

6 votes

@Test
public void testSchemaWriteLoads() throws Exception {
  p.apply(
          Create.of(
              new SchemaPojo("a", 1),
              new SchemaPojo("b", 2),
              new SchemaPojo("c", 3),
              new SchemaPojo("d", 4)))
      .apply(
          BigQueryIO.<SchemaPojo>write()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(Method.FILE_LOADS)
              .useBeamSchema()
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(
          new TableRow().set("name", "a").set("number", "1"),
          new TableRow().set("name", "b").set("number", "2"),
          new TableRow().set("name", "c").set("number", "3"),
          new TableRow().set("name", "d").set("number", "4")));
}

Source File: DataCatalogBigQueryIT.java From beam with Apache License 2.0

5 votes

@Test
public void testRead() throws Exception {
  TableReference bqTable = bigQuery.tableReference();

  // Streaming inserts do not work with DIRECT_READ mode, there is a several hour lag.
  PCollection<Row> data =
      writePipeline.apply(Create.of(row(1, "name1"), row(2, "name2"), row(3, "name3")));
  data.apply(
      BigQueryIO.<Row>write()
          .withSchema(BigQueryUtils.toTableSchema(ID_NAME_SCHEMA))
          .withFormatFunction(BigQueryUtils.toTableRow())
          .withMethod(Method.FILE_LOADS)
          .to(bqTable));
  writePipeline.run().waitUntilFinish(Duration.standardMinutes(2));

  String tableId =
      String.format(
          "bigquery.`table`.`%s`.`%s`.`%s`",
          bqTable.getProjectId(), bqTable.getDatasetId(), bqTable.getTableId());

  readPipeline
      .getOptions()
      .as(BeamSqlPipelineOptions.class)
      .setPlannerName(queryPlanner.getCanonicalName());

  try (DataCatalogTableProvider tableProvider =
      DataCatalogTableProvider.create(
          readPipeline.getOptions().as(DataCatalogPipelineOptions.class))) {
    PCollection<Row> result =
        readPipeline.apply(
            "query",
            SqlTransform.query("SELECT id, name FROM " + tableId)
                .withDefaultTableProvider("datacatalog", tableProvider));

    PAssert.that(result).containsInAnyOrder(row(1, "name1"), row(2, "name2"), row(3, "name3"));
    readPipeline.run().waitUntilFinish(Duration.standardMinutes(2));
  }
}

Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0

5 votes

/**
 * Tests query job and table creation with KMS key settings.
 *
 * <p>Verifies table creation with KMS key.
 */
private void testQueryAndWrite(Method method) throws Exception {
  String outputTableId = "testQueryAndWrite_" + method.name();
  String outputTableSpec = project + ":" + BIG_QUERY_DATASET_ID + "." + outputTableId;

  options.setTempLocation(options.getTempRoot() + "/bq_it_temp");
  Pipeline p = Pipeline.create(options);
  // Reading triggers BQ query and extract jobs. Writing triggers either a load job or performs a
  // streaming insert (depending on method).
  p.apply(
          BigQueryIO.readTableRows()
              .fromQuery("SELECT * FROM (SELECT \"foo\" as fruit)")
              .withKmsKey(kmsKey))
      .apply(
          BigQueryIO.writeTableRows()
              .to(outputTableSpec)
              .withSchema(OUTPUT_SCHEMA)
              .withMethod(method)
              .withKmsKey(kmsKey));
  p.run().waitUntilFinish();

  Table table = BQ_CLIENT.getTableResource(project, BIG_QUERY_DATASET_ID, outputTableId);
  assertNotNull(String.format("table not found: %s", outputTableId), table);
  assertNotNull(
      "output table has no EncryptionConfiguration", table.getEncryptionConfiguration());
  assertEquals(table.getEncryptionConfiguration().getKmsKeyName(), kmsKey);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

5 votes

void schemaUpdateOptionsTest(
    BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions)
    throws Exception {
  TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
          .withSchemaUpdateOptions(schemaUpdateOptions);

  p.apply(Create.<TableRow>of(row)).apply(writeTransform);
  p.run();

  List<String> expectedOptions =
      schemaUpdateOptions.stream().map(Enum::name).collect(Collectors.toList());

  for (Job job : fakeJobService.getAllJobs()) {
    JobConfigurationLoad configuration = job.getConfiguration().getLoad();
    assertEquals(expectedOptions, configuration.getSchemaUpdateOptions());
  }
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

5 votes

@Test
public void testWriteValidateFailsWithAvroFormatAndStreamingInserts() {
  p.enableAbandonedNodeEnforcement(false);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Writing avro formatted data is only supported for FILE_LOADS");
  p.apply(Create.empty(INPUT_RECORD_CODER))
      .apply(
          BigQueryIO.<InputRecord>write()
              .to("dataset.table")
              .withSchema(new TableSchema())
              .withAvroFormatFunction(r -> new GenericData.Record(r.getSchema()))
              .withMethod(Method.STREAMING_INSERTS)
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

5 votes

@Test
public void testWriteWithoutInsertId() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", 1);
  TableRow row2 = new TableRow().set("name", "b").set("number", 2);
  TableRow row3 = new TableRow().set("name", "c").set("number", 3);
  p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("name").setType("STRING"),
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .ignoreInsertIds()
              .withoutValidation());
  p.run();
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row2, row3));
  // Verify no insert id is added.
  assertThat(
      fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder());
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

5 votes

@Test
public void testFailuresNoRetryPolicy() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError)));

  p.apply(Create.of(row1, row2, row3))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("name").setType("STRING"),
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row2, row3));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

5 votes

@Test
public void testTriggeredFileLoads() throws Exception {
  List<TableRow> elements = Lists.newArrayList();
  for (int i = 0; i < 30; ++i) {
    elements.add(new TableRow().set("number", i));
  }

  TestStream<TableRow> testStream =
      TestStream.create(TableRowJsonCoder.of())
          .addElements(
              elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class))
          .advanceWatermarkToInfinity();

  p.apply(testStream)
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withTriggeringFrequency(Duration.standardSeconds(30))
              .withNumFileShards(2)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testClusteringTableFunction() throws Exception {
  TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
  TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");

  TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
  Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to(
                  (ValueInSingleWindow<TableRow> vsw) -> {
                    String tableSpec =
                        "project-id:dataset-id.table-" + vsw.getValue().get("number");
                    return new TableDestination(
                        tableSpec,
                        null,
                        new TimePartitioning().setType("DAY").setField("date"),
                        new Clustering().setFields(ImmutableList.of("date")));
                  })
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withSchema(schema)
              .withClustering()
              .withoutValidation());
  p.run();
  Table table =
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1"));
  assertEquals(schema, table.getSchema());
  assertEquals(timePartitioning, table.getTimePartitioning());
  assertEquals(clustering, table.getClustering());
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testWriteFileSchemaUpdateOptionAll() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.allOf(SchemaUpdateOption.class);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testWriteFileSchemaUpdateOptionAllowFieldRelaxation() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_RELAXATION);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testWriteFileSchemaUpdateOptionAllowFieldAddition() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testExtendedErrorRetrieval() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");
  String tableSpec = "project-id:dataset-id.table-id";

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
  TableDataInsertAllResponse.InsertErrors persistentError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(Lists.newArrayList(new ErrorProto().setReason("invalidQuery")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));

  PCollection<BigQueryInsertError> failedRows =
      p.apply(Create.of(row1, row2, row3))
          .apply(
              BigQueryIO.writeTableRows()
                  .to(tableSpec)
                  .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                  .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
                  .withSchema(
                      new TableSchema()
                          .setFields(
                              ImmutableList.of(
                                  new TableFieldSchema().setName("name").setType("STRING"),
                                  new TableFieldSchema().setName("number").setType("INTEGER"))))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withTestServices(fakeBqServices)
                  .withoutValidation()
                  .withExtendedErrorInfo())
          .getFailedInsertsWithErr();

  // row2 finally fails with a non-retryable error, so we expect to see it in the collection of
  // failed rows.
  PAssert.that(failedRows)
      .containsInAnyOrder(
          new BigQueryInsertError(
              row2, persistentError, BigQueryHelpers.parseTableSpec(tableSpec)));
  p.run();

  // Only row1 and row3 were successfully inserted.
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row3));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testRetryPolicy() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
  TableDataInsertAllResponse.InsertErrors persistentError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("invalidQuery")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));

  PCollection<TableRow> failedRows =
      p.apply(Create.of(row1, row2, row3))
          .apply(
              BigQueryIO.writeTableRows()
                  .to("project-id:dataset-id.table-id")
                  .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                  .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
                  .withSchema(
                      new TableSchema()
                          .setFields(
                              ImmutableList.of(
                                  new TableFieldSchema().setName("name").setType("STRING"),
                                  new TableFieldSchema().setName("number").setType("INTEGER"))))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withTestServices(fakeBqServices)
                  .withoutValidation())
          .getFailedInserts();
  // row2 finally fails with a non-retryable error, so we expect to see it in the collection of
  // failed rows.
  PAssert.that(failedRows).containsInAnyOrder(row2);
  p.run();

  // Only row1 and row3 were successfully inserted.
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row3));
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testTriggeredFileLoadsWithTempTables() throws Exception {
  List<TableRow> elements = Lists.newArrayList();
  for (int i = 0; i < 30; ++i) {
    elements.add(new TableRow().set("number", i));
  }

  TestStream<TableRow> testStream =
      TestStream.create(TableRowJsonCoder.of())
          .addElements(
              elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class))
          .advanceWatermarkToInfinity();

  p.apply(testStream)
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withTriggeringFrequency(Duration.standardSeconds(30))
              .withNumFileShards(2)
              .withMaxBytesPerPartition(1)
              .withMaxFilesPerPartition(1)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}

Source File: PubsubAvroToBigQuery.java From DataflowTemplates with Apache License 2.0

4 votes

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options execution parameters to the pipeline
 * @return result of the pipeline execution as a {@link PipelineResult}
 */
private static PipelineResult run(PubsubAvroToBigQueryOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  Schema schema = SchemaUtils.getAvroSchema(options.getSchemaPath());

  WriteResult writeResults =
      pipeline
          .apply(
              "Read Avro records",
              PubsubIO
                  .readAvroGenericRecords(schema)
                  .fromSubscription(options.getInputSubscription()))

          .apply(
              "Write to BigQuery",
              BigQueryIO.<GenericRecord>write()
                  .to(options.getOutputTableSpec())
                  .useBeamSchema()
                  .withMethod(Method.STREAMING_INSERTS)
                  .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition()))
                  .withCreateDisposition(
                      CreateDisposition.valueOf(options.getCreateDisposition()))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withExtendedErrorInfo());

  writeResults
      .getFailedInsertsWithErr()
      .apply(
          "Create error payload",
          ErrorConverters.BigQueryInsertErrorToPubsubMessage.<GenericRecord>newBuilder()
              .setPayloadCoder(AvroCoder.of(schema))
              .setTranslateFunction(
                  BigQueryConverters.TableRowToGenericRecordFn.of(schema))
              .build())
      .apply(
          "Write failed records",
          PubsubIO.writeMessages().to(options.getOutputTopic()));

  // Execute the pipeline and return the result.
  return pipeline.run();
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test(expected = IllegalArgumentException.class)
public void testClusteringThrowsWithoutPartitioning() throws Exception {
  p.enableAbandonedNodeEnforcement(false);
  testTimePartitioningClustering(Method.STREAMING_INSERTS, false, true);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testClusteringBatchLoads() throws Exception {
  testClustering(BigQueryIO.Write.Method.FILE_LOADS);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testClusteringStreamingInserts() throws Exception {
  testClustering(BigQueryIO.Write.Method.STREAMING_INSERTS);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testTimePartitioningBatchLoads() throws Exception {
  testTimePartitioning(BigQueryIO.Write.Method.FILE_LOADS);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

@Test
public void testTimePartitioningStreamingInserts() throws Exception {
  testTimePartitioning(BigQueryIO.Write.Method.STREAMING_INSERTS);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

void testClustering(BigQueryIO.Write.Method insertMethod) throws Exception {
  testTimePartitioningClustering(insertMethod, true, true);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

void testTimePartitioning(BigQueryIO.Write.Method insertMethod) throws Exception {
  testTimePartitioningClustering(insertMethod, true, false);
}

Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0

4 votes

void testTimePartitioningClustering(
    BigQueryIO.Write.Method insertMethod, boolean enablePartitioning, boolean enableClustering)
    throws Exception {
  TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
  TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");

  TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
  Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withoutValidation();

  if (enablePartitioning) {
    writeTransform = writeTransform.withTimePartitioning(timePartitioning);
  }
  if (enableClustering) {
    writeTransform = writeTransform.withClustering(clustering);
  }

  p.apply(Create.of(row1, row2)).apply(writeTransform);
  p.run();
  Table table =
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"));

  assertEquals(schema, table.getSchema());
  if (enablePartitioning) {
    assertEquals(timePartitioning, table.getTimePartitioning());
  }
  if (enableClustering) {
    assertEquals(clustering, table.getClustering());
  }
}

Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0

4 votes

@Test
public void testWithStreamingInserts() throws Exception {
  testQueryAndWrite(Method.STREAMING_INSERTS);
}

Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0

4 votes

@Test
public void testWithFileLoads() throws Exception {
  testQueryAndWrite(Method.FILE_LOADS);
}

Source File: BigQueryChangeApplier.java From DataflowTemplates with Apache License 2.0

4 votes

@Override
public PDone expand(PCollection<Row> input) {
  Pipeline p = input.getPipeline();
  Schema inputCollectionSchema = input.getSchema();

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection =
      buildTableSchemaCollection(input);
  PCollectionView<Map<String, KV<Schema, Schema>>> schemaMapView = tableSchemaCollection
      .apply(View.asMap());

  PCollection<TableRow> updatesToWrite = formatIntoTableRows(input);

  updatesToWrite.apply(
      BigQueryIO.writeTableRows()
          .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(WriteDisposition.WRITE_APPEND)
          .withMethod(Method.STREAMING_INSERTS)
      .to(new ChangelogTableDynamicDestinations(changeLogDataset, gcpProjectId, schemaMapView)));

  String jobPrefix =
      String.format(
          "beam_cdc_%s_%s_", gcpProjectId.replace(':', '_').replace('.', '_'), replicaDataset);

  // If the input collection does not have a primary key field, then we do not need to issue
  // periodic merge requests.
  if (inputCollectionSchema.hasField(DataflowCdcRowFormat.PRIMARY_KEY)) {
    p.apply("MergeHeartbeat",
        GenerateSequence
            .from(0)
            .withRate(1, Duration.standardSeconds(updateFrequencySeconds)))
        .apply("KeyByTable", ParDo.of(new KeySchemasByTableFn(schemaMapView))
            .withSideInputs(schemaMapView))
        .apply("BuildMergeStatements",
            ParDo.of(
                new MergeStatementBuildingFn(changeLogDataset, replicaDataset, gcpProjectId)))
        .setCoder(SerializableCoder.of(
            TypeDescriptors.kvs(
                TypeDescriptors.strings(),
                TypeDescriptor.of(BigQueryAction.class))))
        .apply("IssueMergeStatements",
            ParDo.of(new BigQueryStatementIssuingFn(jobPrefix)));
  }
  return PDone.in(p);
}

org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method Java Examples