org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord Java Examples
The following examples show how to use
org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeamUtils.java From nomulus with Apache License 2.0 | 6 votes |
/** * Checks that no expected fields in the record are missing. * * <p>Note that this simply makes sure the field is not null; it may still generate a parse error * when interpreting the string representation of an object. * * @throws IllegalStateException if the record returns null for any field in {@code fieldNames} */ public static void checkFieldsNotNull( ImmutableList<String> fieldNames, SchemaAndRecord schemaAndRecord) { GenericRecord record = schemaAndRecord.getRecord(); ImmutableList<String> nullFields = fieldNames .stream() .filter(fieldName -> record.get(fieldName) == null) .collect(ImmutableList.toImmutableList()); String missingFieldList = Joiner.on(", ").join(nullFields); if (!nullFields.isEmpty()) { throw new IllegalStateException( String.format( "Read unexpected null value for field(s) %s for record %s", missingFieldList, record)); } }
Example #2
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a * field is of type Record. */ @Test public void testAvroToEntityRecordField() throws Exception { // Create test data TableFieldSchema column = generateNestedTableFieldSchema(); List<TableFieldSchema> fields = new ArrayList<>(); fields.add(column); TableSchema bqSchema = new TableSchema().setFields(fields); Record record = generateNestedAvroRecord(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); // Assess results String expectedCauseMessage = String.format("Column [address] of type [RECORD] not supported."); assertTrue(!outputEntity.hasKey()); assertEquals( expectedCauseMessage, outputEntity.getPropertiesMap().get("cause").getStringValue()); assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue()); }
Example #3
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace * when the namespace is not specified. */ @Test public void testAvroToEntityDefaultNamespace() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, 1); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test AvroToEntity noNamespaceConverter = AvroToEntity.newBuilder() .setEntityKind(entityKind) .setUniqueNameColumn(uniqueNameColumn) .build(); Entity outputEntity = noNamespaceConverter.apply(inputBqData); // Assess results assertTrue(outputEntity.hasKey()); assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId()); }
Example #4
Source File: BeamUtilsTest.java From nomulus with Apache License 2.0 | 5 votes |
@Before public void initializeRecord() { // Create a record with a given JSON schema. GenericRecord record = new GenericData.Record(new Schema.Parser().parse(GENERIC_SCHEMA)); record.put("aString", "hello world"); record.put("aFloat", 2.54); schemaAndRecord = new SchemaAndRecord(record, null); }
Example #5
Source File: BillingEventTest.java From nomulus with Apache License 2.0 | 5 votes |
@Test public void test_nonNullPoNumber() { GenericRecord record = createRecord(); record.put("poNumber", "905610"); BillingEvent event = BillingEvent.parseFromRecord(new SchemaAndRecord(record, null)); assertThat(event.poNumber()).isEqualTo("905610"); InvoiceGroupingKey invoiceKey = event.getInvoiceGroupingKey(); assertThat(invoiceKey.poNumber()).isEqualTo("905610"); }
Example #6
Source File: Subdomain.java From nomulus with Apache License 2.0 | 5 votes |
/** * Constructs a {@link Subdomain} from an Apache Avro {@code SchemaAndRecord}. * * @see <a * href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html> * Apache AVRO GenericRecord</a> */ static Subdomain parseFromRecord(SchemaAndRecord schemaAndRecord) { checkFieldsNotNull(FIELD_NAMES, schemaAndRecord); GenericRecord record = schemaAndRecord.getRecord(); return create( extractField(record, "fullyQualifiedDomainName"), extractField(record, "registrarClientId"), extractField(record, "registrarEmailAddress")); }
Example #7
Source File: BillingEvent.java From nomulus with Apache License 2.0 | 5 votes |
/** * Constructs a {@code BillingEvent} from a {@code SchemaAndRecord}. * * @see <a * href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html> * Apache AVRO GenericRecord</a> */ static BillingEvent parseFromRecord(SchemaAndRecord schemaAndRecord) { checkFieldsNotNull(FIELD_NAMES, schemaAndRecord); GenericRecord record = schemaAndRecord.getRecord(); String flags = extractField(record, "flags"); double amount = getDiscountedAmount(Double.parseDouble(extractField(record, "amount")), flags); return create( // We need to chain parsers off extractField because GenericRecord only returns // Objects, which contain a string representation of their underlying types. Long.parseLong(extractField(record, "id")), // Bigquery provides UNIX timestamps with microsecond precision. Instant.ofEpochMilli(Long.parseLong(extractField(record, "billingTime")) / 1000) .atZone(ZoneId.of("UTC")), Instant.ofEpochMilli(Long.parseLong(extractField(record, "eventTime")) / 1000) .atZone(ZoneId.of("UTC")), extractField(record, "registrarId"), extractField(record, "billingId"), extractField(record, "poNumber"), extractField(record, "tld"), extractField(record, "action"), extractField(record, "domain"), extractField(record, "repositoryId"), Integer.parseInt(extractField(record, "years")), extractField(record, "currency"), amount, flags); }
Example #8
Source File: BigQueryHllSketchCompatibilityIT.java From beam with Apache License 2.0 | 5 votes |
private void readSketchFromBigQuery(String tableId, Long expectedCount) { String tableSpec = String.format("%s.%s", DATASET_ID, tableId); String query = String.format( "SELECT HLL_COUNT.INIT(%s) AS %s FROM %s", DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec); SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray = input -> // BigQuery BYTES type corresponds to Java java.nio.ByteBuffer type HllCount.getSketchFromByteBuffer( (ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME)); TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class); Pipeline p = Pipeline.create(options); PCollection<Long> result = p.apply( BigQueryIO.read(parseQueryResultToByteArray) .fromQuery(query) .usingStandardSql() .withMethod(Method.DIRECT_READ) .withCoder(ByteArrayCoder.of())) .apply(HllCount.MergePartial.globally()) // no-op, only for testing MergePartial .apply(HllCount.Extract.globally()); PAssert.thatSingleton(result).isEqualTo(expectedCount); p.run().waitUntilFinish(); }
Example #9
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a * Timestamp field is invalid. */ @Test public void testAvroToEntityInvalidTimestampField() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(invalidTimestampField).setType("TIMESTAMP")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc)) .append(",") .append( String.format( avroFieldTemplate, invalidTimestampField, "long", invalidTimestampFieldDesc)) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, idFieldValueStr); builder.set(invalidTimestampField, invalidTimestampFieldValueNanos); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); // Assess results assertTrue(!outputEntity.hasKey()); assertTrue( outputEntity .getPropertiesMap() .get("cause") .getStringValue() .startsWith("Timestamp is not valid")); assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue()); }
Example #10
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the * unique name column is string. */ @Test public void testAvroToEntityStringIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, idFieldValueStr); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(outputEntity.hasKey()); assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName()); validateMetadata(outputEntity); }
Example #11
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the * unique name column is integer. */ @Test public void testAvroToEntityIntegerIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("INTEGER")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, idFieldValueInt); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(outputEntity.hasKey()); assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName()); validateMetadata(outputEntity); }
Example #12
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the * unique name column exceeds the maximum size allowed of 1500 bytes. */ @Test public void testAvroToEntityTooLongIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, longStringFieldValue); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(!outputEntity.hasKey()); }
Example #13
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the * unique name column is null. */ @Test public void testAvroToEntityNullIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "null", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, null); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(!outputEntity.hasKey()); }
Example #14
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the * unique name column is missing. */ @Test public void testAvroToEntityNoIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Record record = generateSingleFieldAvroRecord( shortStringField, "string", shortStringFieldDesc, shortStringFieldValue); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(!outputEntity.hasKey()); }
Example #15
Source File: BigQueryToTFRecordTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testBigQueryToTFRecordWithExeception() throws Exception { expectedEx.expect(RuntimeException.class); expectedEx.expectMessage("Unsupported type: BOLEAN"); Long i1 = new Long(0); double f1 = 0.0d; String s1 = ""; byte[] b1 = new byte[8]; record.put("int1", i1); record.put("float1", f1); record.put("string1", s1); record.put("bytes1", b1); record.put("bool1", true); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); fields.add(new TableFieldSchema().setName("int1").setType("INTEGER")); fields.add(new TableFieldSchema().setName("float1").setType("FLOAT")); fields.add(new TableFieldSchema().setName("string1").setType("STRING")); fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES")); fields.add(new TableFieldSchema().setName("bool1").setType("BOLEAN")); final TableSchema tableSchema = new TableSchema(); tableSchema.setFields(fields); final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema); byte[] gotBytes = record2Example(schemaAndRecord); }
Example #16
Source File: BigQueryToTFRecord.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * The {@link BigQueryToTFRecord#record2Example(SchemaAndRecord)} method uses takes in a * SchemaAndRecord Object returned from a BigQueryIO.read() step and builds a TensorFlow Example * from the record. */ @VisibleForTesting protected static byte[] record2Example(SchemaAndRecord schemaAndRecord) { Example.Builder example = Example.newBuilder(); Features.Builder features = example.getFeaturesBuilder(); GenericRecord record = schemaAndRecord.getRecord(); for (TableFieldSchema field : schemaAndRecord.getTableSchema().getFields()) { Feature feature = buildFeature(record.get(field.getName()), field.getType()); features.putFeature(field.getName(), feature); } return example.build().toByteArray(); }
Example #17
Source File: Read.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Override public PCollection<PubsubMessage> expand(PBegin input) { BigQueryIO.TypedRead<PubsubMessage> read = BigQueryIO .read((SchemaAndRecord schemaAndRecord) -> { TableSchema tableSchema = schemaAndRecord.getTableSchema(); GenericRecord record = schemaAndRecord.getRecord(); // We have to take care not to read additional bytes; see // https://github.com/mozilla/gcp-ingestion/issues/1266 ByteBuffer byteBuffer = (ByteBuffer) record.get(FieldName.PAYLOAD); byte[] payload = new byte[byteBuffer.limit()]; byteBuffer.get(payload); // We populate attributes for all simple string and timestamp fields, which is complete // for raw and error tables. // Decoded payload tables also have a top-level nested "metadata" struct; we can mostly // just drop this since the same metadata object is encoded in the payload, but we do // parse out the document namespace, type, and version since those are necessary in the // case of a Sink job that doesn't look at the payload but still may need those // attributes in order to route to the correct destination. Map<String, String> attributes = new HashMap<>(); tableSchema.getFields().stream() // .filter(f -> !"REPEATED".equals(f.getMode())) // .forEach(f -> { Object value = record.get(f.getName()); if (value != null) { switch (f.getType()) { case "TIMESTAMP": attributes.put(f.getName(), Time.epochMicrosToTimestamp((Long) value)); break; case "STRING": case "INTEGER": case "INT64": attributes.put(f.getName(), value.toString()); break; case "RECORD": case "STRUCT": // The only struct we support is the top-level nested "metadata" and we // extract only the attributes needed for destination routing. GenericRecord metadata = (GenericRecord) value; Arrays .asList(Attribute.DOCUMENT_NAMESPACE, Attribute.DOCUMENT_TYPE, Attribute.DOCUMENT_VERSION) .forEach(v -> attributes.put(v, metadata.get(v).toString())); break; // Ignore any other types (only the payload BYTES field should hit this). default: break; } } }); return new PubsubMessage(payload, attributes); }) // .withCoder(PubsubMessageWithAttributesCoder.of()) // .withTemplateCompatibility() // .withoutValidation() // .withMethod(method.method); switch (source) { case TABLE: read = read.from(tableSpec); break; default: case QUERY: read = read.fromQuery(tableSpec).usingStandardSql(); } if (source == Source.TABLE && method == BigQueryReadMethod.storageapi) { if (rowRestriction != null) { read = read.withRowRestriction(rowRestriction); } if (selectedFields != null) { read = read.withSelectedFields(selectedFields); } } return input.apply(read); }
Example #18
Source File: BigQueryToTFRecordTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** Test {@link BigQueryToTFRecord} correctly outputs TFRecord. */ @Test public void record2ExampleTest() throws InvalidProtocolBufferException { Long i1 = new Long(0); double f1 = 0.0d; String s1 = ""; byte[] b1 = new byte[8]; record.put("int1", i1); record.put("float1", f1); record.put("string1", s1); record.put("bytes1", b1); record.put("bool1", true); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); fields.add(new TableFieldSchema().setName("int1").setType("INTEGER")); fields.add(new TableFieldSchema().setName("float1").setType("FLOAT")); fields.add(new TableFieldSchema().setName("string1").setType("STRING")); fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES")); fields.add(new TableFieldSchema().setName("bool1").setType("BOOLEAN")); final TableSchema tableSchema = new TableSchema(); tableSchema.setFields(fields); final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema); Example.Builder example = Example.newBuilder(); Features.Builder features = example.getFeaturesBuilder(); Feature.Builder int1 = Feature.newBuilder(); Feature.Builder float1 = Feature.newBuilder(); Feature.Builder string1 = Feature.newBuilder(); Feature.Builder bytes1 = Feature.newBuilder(); Feature.Builder bool1 = Feature.newBuilder(); int1.getInt64ListBuilder().addValue(i1); float1.getFloatListBuilder().addValue((float) f1); string1.getBytesListBuilder().addValue(ByteString.copyFromUtf8(s1)); bytes1.getBytesListBuilder().addValue(ByteString.copyFrom(b1)); bool1.getInt64ListBuilder().addValue(1); features.putFeature("int1", int1.build()); features.putFeature("float1", float1.build()); features.putFeature("string1", string1.build()); features.putFeature("bytes1", bytes1.build()); features.putFeature("bool1", bool1.build()); byte[] gotBytes = record2Example(schemaAndRecord); Example gotExample = Example.parseFrom(gotBytes); Map<String, Feature> gotFeatures = gotExample.getFeatures().getFeatureMap(); Feature[] got = new Feature[5]; got[0] = gotFeatures.get("int1"); got[1] = gotFeatures.get("float1"); got[2] = gotFeatures.get("string1"); got[3] = gotFeatures.get("bytes1"); got[4] = gotFeatures.get("bool1"); final Example wantExample = example.build(); Map<String, Feature> wantFeatures = wantExample.getFeatures().getFeatureMap(); Feature[] want = new Feature[5]; want[0] = wantFeatures.get("int1"); want[1] = wantFeatures.get("float1"); want[2] = wantFeatures.get("string1"); want[3] = wantFeatures.get("bytes1"); want[4] = wantFeatures.get("bool1"); for (int i = 0; i < 5; i++) { Assert.assertThat(got[i], equalTo(want[i])); } }
Example #19
Source File: BillingEventTest.java From nomulus with Apache License 2.0 | 4 votes |
@Before public void initializeRecord() { // Create a record with a given JSON schema. schemaAndRecord = new SchemaAndRecord(createRecord(), null); }
Example #20
Source File: BigQueryToParquet.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** * Runs the pipeline with the supplied options. * * @param options The execution parameters to the pipeline. * @return The result of the pipeline execution. */ private static PipelineResult run(BigQueryToParquetOptions options) { // Create the pipeline. Pipeline pipeline = Pipeline.create(options); TableReadOptions.Builder builder = TableReadOptions.newBuilder(); /* Add fields to filter export on, if any. */ if (options.getFields() != null) { builder.addAllSelectedFields(Arrays.asList(options.getFields().split(",\\s*"))); } TableReadOptions tableReadOptions = builder.build(); BigQueryStorageClient client = BigQueryStorageClientFactory.create(); ReadSession session = ReadSessionFactory.create(client, options.getTableRef(), tableReadOptions); // Extract schema from ReadSession Schema schema = getTableSchema(session); client.close(); /* * Steps: 1) Read records from BigQuery via BigQueryIO. * 2) Write records to Google Cloud Storage in Parquet format. */ pipeline /* * Step 1: Read records via BigQueryIO using supplied schema as a PCollection of * {@link GenericRecord}. */ .apply( "ReadFromBigQuery", BigQueryIO.read(SchemaAndRecord::getRecord) .from(options.getTableRef()) .withTemplateCompatibility() .withMethod(Method.DIRECT_READ) .withCoder(AvroCoder.of(schema)) .withReadOptions(tableReadOptions)) /* * Step 2: Write records to Google Cloud Storage as one or more Parquet files * via {@link ParquetIO}. */ .apply( "WriteToParquet", FileIO.<GenericRecord>write() .via(ParquetIO.sink(schema)) .to(options.getBucket()) .withNumShards(options.getNumShards()) .withSuffix(FILE_SUFFIX)); // Execute the pipeline and return the result. return pipeline.run(); }