org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method Java Examples
The following examples show how to use
org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteToTableDecorator() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.of(row1, row2)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id$20171127") .withTestServices(fakeBqServices) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema(schema) .withoutValidation()); p.run(); }
Example #2
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSchemaWriteStreams() throws Exception { p.apply( Create.of( new SchemaPojo("a", 1), new SchemaPojo("b", 2), new SchemaPojo("c", 3), new SchemaPojo("d", 4))) .apply( BigQueryIO.<SchemaPojo>write() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(Method.STREAMING_INSERTS) .useBeamSchema() .withTestServices(fakeBqServices) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder( new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"))); }
Example #3
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSchemaWriteLoads() throws Exception { p.apply( Create.of( new SchemaPojo("a", 1), new SchemaPojo("b", 2), new SchemaPojo("c", 3), new SchemaPojo("d", 4))) .apply( BigQueryIO.<SchemaPojo>write() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(Method.FILE_LOADS) .useBeamSchema() .withTestServices(fakeBqServices) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder( new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"))); }
Example #4
Source File: DataCatalogBigQueryIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRead() throws Exception { TableReference bqTable = bigQuery.tableReference(); // Streaming inserts do not work with DIRECT_READ mode, there is a several hour lag. PCollection<Row> data = writePipeline.apply(Create.of(row(1, "name1"), row(2, "name2"), row(3, "name3"))); data.apply( BigQueryIO.<Row>write() .withSchema(BigQueryUtils.toTableSchema(ID_NAME_SCHEMA)) .withFormatFunction(BigQueryUtils.toTableRow()) .withMethod(Method.FILE_LOADS) .to(bqTable)); writePipeline.run().waitUntilFinish(Duration.standardMinutes(2)); String tableId = String.format( "bigquery.`table`.`%s`.`%s`.`%s`", bqTable.getProjectId(), bqTable.getDatasetId(), bqTable.getTableId()); readPipeline .getOptions() .as(BeamSqlPipelineOptions.class) .setPlannerName(queryPlanner.getCanonicalName()); try (DataCatalogTableProvider tableProvider = DataCatalogTableProvider.create( readPipeline.getOptions().as(DataCatalogPipelineOptions.class))) { PCollection<Row> result = readPipeline.apply( "query", SqlTransform.query("SELECT id, name FROM " + tableId) .withDefaultTableProvider("datacatalog", tableProvider)); PAssert.that(result).containsInAnyOrder(row(1, "name1"), row(2, "name2"), row(3, "name3")); readPipeline.run().waitUntilFinish(Duration.standardMinutes(2)); } }
Example #5
Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0 | 5 votes |
/** * Tests query job and table creation with KMS key settings. * * <p>Verifies table creation with KMS key. */ private void testQueryAndWrite(Method method) throws Exception { String outputTableId = "testQueryAndWrite_" + method.name(); String outputTableSpec = project + ":" + BIG_QUERY_DATASET_ID + "." + outputTableId; options.setTempLocation(options.getTempRoot() + "/bq_it_temp"); Pipeline p = Pipeline.create(options); // Reading triggers BQ query and extract jobs. Writing triggers either a load job or performs a // streaming insert (depending on method). p.apply( BigQueryIO.readTableRows() .fromQuery("SELECT * FROM (SELECT \"foo\" as fruit)") .withKmsKey(kmsKey)) .apply( BigQueryIO.writeTableRows() .to(outputTableSpec) .withSchema(OUTPUT_SCHEMA) .withMethod(method) .withKmsKey(kmsKey)); p.run().waitUntilFinish(); Table table = BQ_CLIENT.getTableResource(project, BIG_QUERY_DATASET_ID, outputTableId); assertNotNull(String.format("table not found: %s", outputTableId), table); assertNotNull( "output table has no EncryptionConfiguration", table.getEncryptionConfiguration()); assertEquals(table.getEncryptionConfiguration().getKmsKeyName(), kmsKey); }
Example #6
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
void schemaUpdateOptionsTest( BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions) throws Exception { TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1"); TableSchema schema = new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema() .setName("date") .setType("DATE") .setName("number") .setType("INTEGER"))); Write<TableRow> writeTransform = BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withMethod(insertMethod) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withSchemaUpdateOptions(schemaUpdateOptions); p.apply(Create.<TableRow>of(row)).apply(writeTransform); p.run(); List<String> expectedOptions = schemaUpdateOptions.stream().map(Enum::name).collect(Collectors.toList()); for (Job job : fakeJobService.getAllJobs()) { JobConfigurationLoad configuration = job.getConfiguration().getLoad(); assertEquals(expectedOptions, configuration.getSchemaUpdateOptions()); } }
Example #7
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWriteValidateFailsWithAvroFormatAndStreamingInserts() { p.enableAbandonedNodeEnforcement(false); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Writing avro formatted data is only supported for FILE_LOADS"); p.apply(Create.empty(INPUT_RECORD_CODER)) .apply( BigQueryIO.<InputRecord>write() .to("dataset.table") .withSchema(new TableSchema()) .withAvroFormatFunction(r -> new GenericData.Record(r.getSchema())) .withMethod(Method.STREAMING_INSERTS) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); }
Example #8
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWriteWithoutInsertId() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", 1); TableRow row2 = new TableRow().set("name", "b").set("number", 2); TableRow row3 = new TableRow().set("name", "c").set("number", 3); p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .ignoreInsertIds() .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3)); // Verify no insert id is added. assertThat( fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder()); }
Example #9
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testFailuresNoRetryPolicy() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableRow row3 = new TableRow().set("name", "c").set("number", "3"); TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors() .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))); fakeDatasetService.failOnInsert( ImmutableMap.of( row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError))); p.apply(Create.of(row1, row2, row3)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3)); }
Example #10
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testTriggeredFileLoads() throws Exception { List<TableRow> elements = Lists.newArrayList(); for (int i = 0; i < 30; ++i) { elements.add(new TableRow().set("number", i)); } TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()) .addElements( elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)) .advanceWatermarkToInfinity(); p.apply(testStream) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .withTriggeringFrequency(Duration.standardSeconds(30)) .withNumFileShards(2) .withMethod(BigQueryIO.Write.Method.FILE_LOADS) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class))); }
Example #11
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testClusteringTableFunction() throws Exception { TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1"); TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2"); TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date"); Clustering clustering = new Clustering().setFields(ImmutableList.of("date")); TableSchema schema = new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema() .setName("date") .setType("DATE") .setName("number") .setType("INTEGER"))); p.apply(Create.of(row1, row2)) .apply( BigQueryIO.writeTableRows() .to( (ValueInSingleWindow<TableRow> vsw) -> { String tableSpec = "project-id:dataset-id.table-" + vsw.getValue().get("number"); return new TableDestination( tableSpec, null, new TimePartitioning().setType("DAY").setField("date"), new Clustering().setFields(ImmutableList.of("date"))); }) .withTestServices(fakeBqServices) .withMethod(BigQueryIO.Write.Method.FILE_LOADS) .withSchema(schema) .withClustering() .withoutValidation()); p.run(); Table table = fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1")); assertEquals(schema, table.getSchema()); assertEquals(timePartitioning, table.getTimePartitioning()); assertEquals(clustering, table.getClustering()); }
Example #12
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWriteFileSchemaUpdateOptionAll() throws Exception { Set<SchemaUpdateOption> options = EnumSet.allOf(SchemaUpdateOption.class); schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options); }
Example #13
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWriteFileSchemaUpdateOptionAllowFieldRelaxation() throws Exception { Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_RELAXATION); schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options); }
Example #14
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWriteFileSchemaUpdateOptionAllowFieldAddition() throws Exception { Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION); schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options); }
Example #15
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testExtendedErrorRetrieval() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableRow row3 = new TableRow().set("name", "c").set("number", "3"); String tableSpec = "project-id:dataset-id.table-id"; TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors() .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))); TableDataInsertAllResponse.InsertErrors persistentError = new TableDataInsertAllResponse.InsertErrors() .setErrors(Lists.newArrayList(new ErrorProto().setReason("invalidQuery"))); fakeDatasetService.failOnInsert( ImmutableMap.of( row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError))); PCollection<BigQueryInsertError> failedRows = p.apply(Create.of(row1, row2, row3)) .apply( BigQueryIO.writeTableRows() .to(tableSpec) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()) .withTestServices(fakeBqServices) .withoutValidation() .withExtendedErrorInfo()) .getFailedInsertsWithErr(); // row2 finally fails with a non-retryable error, so we expect to see it in the collection of // failed rows. PAssert.that(failedRows) .containsInAnyOrder( new BigQueryInsertError( row2, persistentError, BigQueryHelpers.parseTableSpec(tableSpec))); p.run(); // Only row1 and row3 were successfully inserted. assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row3)); }
Example #16
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testRetryPolicy() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableRow row3 = new TableRow().set("name", "c").set("number", "3"); TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors() .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))); TableDataInsertAllResponse.InsertErrors persistentError = new TableDataInsertAllResponse.InsertErrors() .setErrors(ImmutableList.of(new ErrorProto().setReason("invalidQuery"))); fakeDatasetService.failOnInsert( ImmutableMap.of( row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError))); PCollection<TableRow> failedRows = p.apply(Create.of(row1, row2, row3)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()) .withTestServices(fakeBqServices) .withoutValidation()) .getFailedInserts(); // row2 finally fails with a non-retryable error, so we expect to see it in the collection of // failed rows. PAssert.that(failedRows).containsInAnyOrder(row2); p.run(); // Only row1 and row3 were successfully inserted. assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row3)); }
Example #17
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testTriggeredFileLoadsWithTempTables() throws Exception { List<TableRow> elements = Lists.newArrayList(); for (int i = 0; i < 30; ++i) { elements.add(new TableRow().set("number", i)); } TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()) .addElements( elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)) .advanceWatermarkToInfinity(); p.apply(testStream) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .withTriggeringFrequency(Duration.standardSeconds(30)) .withNumFileShards(2) .withMaxBytesPerPartition(1) .withMaxFilesPerPartition(1) .withMethod(BigQueryIO.Write.Method.FILE_LOADS) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class))); }
Example #18
Source File: PubsubAvroToBigQuery.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** * Runs the pipeline with the supplied options. * * @param options execution parameters to the pipeline * @return result of the pipeline execution as a {@link PipelineResult} */ private static PipelineResult run(PubsubAvroToBigQueryOptions options) { // Create the pipeline. Pipeline pipeline = Pipeline.create(options); Schema schema = SchemaUtils.getAvroSchema(options.getSchemaPath()); WriteResult writeResults = pipeline .apply( "Read Avro records", PubsubIO .readAvroGenericRecords(schema) .fromSubscription(options.getInputSubscription())) .apply( "Write to BigQuery", BigQueryIO.<GenericRecord>write() .to(options.getOutputTableSpec()) .useBeamSchema() .withMethod(Method.STREAMING_INSERTS) .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition())) .withCreateDisposition( CreateDisposition.valueOf(options.getCreateDisposition())) .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()) .withExtendedErrorInfo()); writeResults .getFailedInsertsWithErr() .apply( "Create error payload", ErrorConverters.BigQueryInsertErrorToPubsubMessage.<GenericRecord>newBuilder() .setPayloadCoder(AvroCoder.of(schema)) .setTranslateFunction( BigQueryConverters.TableRowToGenericRecordFn.of(schema)) .build()) .apply( "Write failed records", PubsubIO.writeMessages().to(options.getOutputTopic())); // Execute the pipeline and return the result. return pipeline.run(); }
Example #19
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test(expected = IllegalArgumentException.class) public void testClusteringThrowsWithoutPartitioning() throws Exception { p.enableAbandonedNodeEnforcement(false); testTimePartitioningClustering(Method.STREAMING_INSERTS, false, true); }
Example #20
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testClusteringBatchLoads() throws Exception { testClustering(BigQueryIO.Write.Method.FILE_LOADS); }
Example #21
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testClusteringStreamingInserts() throws Exception { testClustering(BigQueryIO.Write.Method.STREAMING_INSERTS); }
Example #22
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testTimePartitioningBatchLoads() throws Exception { testTimePartitioning(BigQueryIO.Write.Method.FILE_LOADS); }
Example #23
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testTimePartitioningStreamingInserts() throws Exception { testTimePartitioning(BigQueryIO.Write.Method.STREAMING_INSERTS); }
Example #24
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
void testClustering(BigQueryIO.Write.Method insertMethod) throws Exception { testTimePartitioningClustering(insertMethod, true, true); }
Example #25
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
void testTimePartitioning(BigQueryIO.Write.Method insertMethod) throws Exception { testTimePartitioningClustering(insertMethod, true, false); }
Example #26
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
void testTimePartitioningClustering( BigQueryIO.Write.Method insertMethod, boolean enablePartitioning, boolean enableClustering) throws Exception { TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1"); TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2"); TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date"); Clustering clustering = new Clustering().setFields(ImmutableList.of("date")); TableSchema schema = new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema() .setName("date") .setType("DATE") .setName("number") .setType("INTEGER"))); Write<TableRow> writeTransform = BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withMethod(insertMethod) .withSchema(schema) .withoutValidation(); if (enablePartitioning) { writeTransform = writeTransform.withTimePartitioning(timePartitioning); } if (enableClustering) { writeTransform = writeTransform.withClustering(clustering); } p.apply(Create.of(row1, row2)).apply(writeTransform); p.run(); Table table = fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id")); assertEquals(schema, table.getSchema()); if (enablePartitioning) { assertEquals(timePartitioning, table.getTimePartitioning()); } if (enableClustering) { assertEquals(clustering, table.getClustering()); } }
Example #27
Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWithStreamingInserts() throws Exception { testQueryAndWrite(Method.STREAMING_INSERTS); }
Example #28
Source File: BigQueryKmsKeyIT.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWithFileLoads() throws Exception { testQueryAndWrite(Method.FILE_LOADS); }
Example #29
Source File: BigQueryChangeApplier.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public PDone expand(PCollection<Row> input) { Pipeline p = input.getPipeline(); Schema inputCollectionSchema = input.getSchema(); PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection = buildTableSchemaCollection(input); PCollectionView<Map<String, KV<Schema, Schema>>> schemaMapView = tableSchemaCollection .apply(View.asMap()); PCollection<TableRow> updatesToWrite = formatIntoTableRows(input); updatesToWrite.apply( BigQueryIO.writeTableRows() .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND) .withMethod(Method.STREAMING_INSERTS) .to(new ChangelogTableDynamicDestinations(changeLogDataset, gcpProjectId, schemaMapView))); String jobPrefix = String.format( "beam_cdc_%s_%s_", gcpProjectId.replace(':', '_').replace('.', '_'), replicaDataset); // If the input collection does not have a primary key field, then we do not need to issue // periodic merge requests. if (inputCollectionSchema.hasField(DataflowCdcRowFormat.PRIMARY_KEY)) { p.apply("MergeHeartbeat", GenerateSequence .from(0) .withRate(1, Duration.standardSeconds(updateFrequencySeconds))) .apply("KeyByTable", ParDo.of(new KeySchemasByTableFn(schemaMapView)) .withSideInputs(schemaMapView)) .apply("BuildMergeStatements", ParDo.of( new MergeStatementBuildingFn(changeLogDataset, replicaDataset, gcpProjectId))) .setCoder(SerializableCoder.of( TypeDescriptors.kvs( TypeDescriptors.strings(), TypeDescriptor.of(BigQueryAction.class)))) .apply("IssueMergeStatements", ParDo.of(new BigQueryStatementIssuingFn(jobPrefix))); } return PDone.in(p); }