org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord Java Exaples

Source File: BeamUtils.java From nomulus with Apache License 2.0

6 votes

/**
 * Checks that no expected fields in the record are missing.
 *
 * <p>Note that this simply makes sure the field is not null; it may still generate a parse error
 * when interpreting the string representation of an object.
 *
 * @throws IllegalStateException if the record returns null for any field in {@code fieldNames}
 */
public static void checkFieldsNotNull(
    ImmutableList<String> fieldNames, SchemaAndRecord schemaAndRecord) {
  GenericRecord record = schemaAndRecord.getRecord();
  ImmutableList<String> nullFields =
      fieldNames
          .stream()
          .filter(fieldName -> record.get(fieldName) == null)
          .collect(ImmutableList.toImmutableList());
  String missingFieldList = Joiner.on(", ").join(nullFields);
  if (!nullFields.isEmpty()) {
    throw new IllegalStateException(
        String.format(
            "Read unexpected null value for field(s) %s for record %s",
            missingFieldList, record));
  }
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

6 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a
 * field is of type Record.
 */
@Test
public void testAvroToEntityRecordField() throws Exception {
  // Create test data
  TableFieldSchema column = generateNestedTableFieldSchema();
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(column);
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Record record = generateNestedAvroRecord();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  // Assess results
  String expectedCauseMessage = String.format("Column [address] of type [RECORD] not supported.");
  assertTrue(!outputEntity.hasKey());
  assertEquals(
      expectedCauseMessage, outputEntity.getPropertiesMap().get("cause").getStringValue());
  assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue());
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace
 * when the namespace is not specified.
 */
@Test
public void testAvroToEntityDefaultNamespace() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, 1);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  AvroToEntity noNamespaceConverter =
      AvroToEntity.newBuilder()
          .setEntityKind(entityKind)
          .setUniqueNameColumn(uniqueNameColumn)
          .build();
  Entity outputEntity = noNamespaceConverter.apply(inputBqData);
  // Assess results
  assertTrue(outputEntity.hasKey());
  assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId());
}

Source File: BeamUtilsTest.java From nomulus with Apache License 2.0

5 votes

@Before
public void initializeRecord() {
  // Create a record with a given JSON schema.
  GenericRecord record = new GenericData.Record(new Schema.Parser().parse(GENERIC_SCHEMA));
  record.put("aString", "hello world");
  record.put("aFloat", 2.54);
  schemaAndRecord = new SchemaAndRecord(record, null);
}

Source File: BillingEventTest.java From nomulus with Apache License 2.0

5 votes

@Test
public void test_nonNullPoNumber() {
  GenericRecord record = createRecord();
  record.put("poNumber", "905610");
  BillingEvent event = BillingEvent.parseFromRecord(new SchemaAndRecord(record, null));
  assertThat(event.poNumber()).isEqualTo("905610");
  InvoiceGroupingKey invoiceKey = event.getInvoiceGroupingKey();
  assertThat(invoiceKey.poNumber()).isEqualTo("905610");
}

Source File: Subdomain.java From nomulus with Apache License 2.0

5 votes

/**
 * Constructs a {@link Subdomain} from an Apache Avro {@code SchemaAndRecord}.
 *
 * @see <a
 *     href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html>
 *     Apache AVRO GenericRecord</a>
 */
static Subdomain parseFromRecord(SchemaAndRecord schemaAndRecord) {
  checkFieldsNotNull(FIELD_NAMES, schemaAndRecord);
  GenericRecord record = schemaAndRecord.getRecord();
  return create(
      extractField(record, "fullyQualifiedDomainName"),
      extractField(record, "registrarClientId"),
      extractField(record, "registrarEmailAddress"));
}

Source File: BillingEvent.java From nomulus with Apache License 2.0

5 votes

/**
 * Constructs a {@code BillingEvent} from a {@code SchemaAndRecord}.
 *
 * @see <a
 *     href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html>
 *     Apache AVRO GenericRecord</a>
 */
static BillingEvent parseFromRecord(SchemaAndRecord schemaAndRecord) {
  checkFieldsNotNull(FIELD_NAMES, schemaAndRecord);
  GenericRecord record = schemaAndRecord.getRecord();
  String flags = extractField(record, "flags");
  double amount = getDiscountedAmount(Double.parseDouble(extractField(record, "amount")), flags);
  return create(
      // We need to chain parsers off extractField because GenericRecord only returns
      // Objects, which contain a string representation of their underlying types.
      Long.parseLong(extractField(record, "id")),
      // Bigquery provides UNIX timestamps with microsecond precision.
      Instant.ofEpochMilli(Long.parseLong(extractField(record, "billingTime")) / 1000)
          .atZone(ZoneId.of("UTC")),
      Instant.ofEpochMilli(Long.parseLong(extractField(record, "eventTime")) / 1000)
          .atZone(ZoneId.of("UTC")),
      extractField(record, "registrarId"),
      extractField(record, "billingId"),
      extractField(record, "poNumber"),
      extractField(record, "tld"),
      extractField(record, "action"),
      extractField(record, "domain"),
      extractField(record, "repositoryId"),
      Integer.parseInt(extractField(record, "years")),
      extractField(record, "currency"),
      amount,
      flags);
}

Source File: BigQueryHllSketchCompatibilityIT.java From beam with Apache License 2.0

5 votes

private void readSketchFromBigQuery(String tableId, Long expectedCount) {
  String tableSpec = String.format("%s.%s", DATASET_ID, tableId);
  String query =
      String.format(
          "SELECT HLL_COUNT.INIT(%s) AS %s FROM %s",
          DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec);

  SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray =
      input ->
          // BigQuery BYTES type corresponds to Java java.nio.ByteBuffer type
          HllCount.getSketchFromByteBuffer(
              (ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME));

  TestPipelineOptions options =
      TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);

  Pipeline p = Pipeline.create(options);
  PCollection<Long> result =
      p.apply(
              BigQueryIO.read(parseQueryResultToByteArray)
                  .fromQuery(query)
                  .usingStandardSql()
                  .withMethod(Method.DIRECT_READ)
                  .withCoder(ByteArrayCoder.of()))
          .apply(HllCount.MergePartial.globally()) // no-op, only for testing MergePartial
          .apply(HllCount.Extract.globally());
  PAssert.thatSingleton(result).isEqualTo(expectedCount);
  p.run().waitUntilFinish();
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a
 * Timestamp field is invalid.
 */
@Test
public void testAvroToEntityInvalidTimestampField() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(invalidTimestampField).setType("TIMESTAMP"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(
                          String.format(
                              avroFieldTemplate,
                              invalidTimestampField,
                              "long",
                              invalidTimestampFieldDesc))
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueStr);
  builder.set(invalidTimestampField, invalidTimestampFieldValueNanos);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  // Assess results
  assertTrue(!outputEntity.hasKey());
  assertTrue(
      outputEntity
          .getPropertiesMap()
          .get("cause")
          .getStringValue()
          .startsWith("Timestamp is not valid"));
  assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue());
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is string.
 */
@Test
public void testAvroToEntityStringIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueStr);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is integer.
 */
@Test
public void testAvroToEntityIntegerIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("INTEGER"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueInt);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column exceeds the maximum size allowed of 1500 bytes.
 */
@Test
public void testAvroToEntityTooLongIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, longStringFieldValue);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column is null.
 */
@Test
public void testAvroToEntityNullIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "null", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, null);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}

Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column is missing.
 */
@Test
public void testAvroToEntityNoIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Record record =
      generateSingleFieldAvroRecord(
          shortStringField, "string", shortStringFieldDesc, shortStringFieldValue);
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}

Source File: BigQueryToTFRecordTest.java From DataflowTemplates with Apache License 2.0

5 votes

@Test
public void testBigQueryToTFRecordWithExeception() throws Exception {
  expectedEx.expect(RuntimeException.class);
  expectedEx.expectMessage("Unsupported type: BOLEAN");

  Long i1 = new Long(0);
  double f1 = 0.0d;
  String s1 = "";
  byte[] b1 = new byte[8];

  record.put("int1", i1);
  record.put("float1", f1);
  record.put("string1", s1);
  record.put("bytes1", b1);
  record.put("bool1", true);

  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  fields.add(new TableFieldSchema().setName("int1").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("float1").setType("FLOAT"));
  fields.add(new TableFieldSchema().setName("string1").setType("STRING"));
  fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES"));
  fields.add(new TableFieldSchema().setName("bool1").setType("BOLEAN"));
  final TableSchema tableSchema = new TableSchema();
  tableSchema.setFields(fields);
  final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema);

  byte[] gotBytes = record2Example(schemaAndRecord);
}

Source File: BigQueryToTFRecord.java From DataflowTemplates with Apache License 2.0

5 votes

/**
 * The {@link BigQueryToTFRecord#record2Example(SchemaAndRecord)} method uses takes in a
 * SchemaAndRecord Object returned from a BigQueryIO.read() step and builds a TensorFlow Example
 * from the record.
 */
@VisibleForTesting
protected static byte[] record2Example(SchemaAndRecord schemaAndRecord) {
  Example.Builder example = Example.newBuilder();
  Features.Builder features = example.getFeaturesBuilder();
  GenericRecord record = schemaAndRecord.getRecord();
  for (TableFieldSchema field : schemaAndRecord.getTableSchema().getFields()) {
    Feature feature = buildFeature(record.get(field.getName()), field.getType());
    features.putFeature(field.getName(), feature);
  }
  return example.build().toByteArray();
}

Source File: Read.java From gcp-ingestion with Mozilla Public License 2.0

4 votes

@Override
public PCollection<PubsubMessage> expand(PBegin input) {
  BigQueryIO.TypedRead<PubsubMessage> read = BigQueryIO
      .read((SchemaAndRecord schemaAndRecord) -> {
        TableSchema tableSchema = schemaAndRecord.getTableSchema();
        GenericRecord record = schemaAndRecord.getRecord();

        // We have to take care not to read additional bytes; see
        // https://github.com/mozilla/gcp-ingestion/issues/1266
        ByteBuffer byteBuffer = (ByteBuffer) record.get(FieldName.PAYLOAD);
        byte[] payload = new byte[byteBuffer.limit()];
        byteBuffer.get(payload);

        // We populate attributes for all simple string and timestamp fields, which is complete
        // for raw and error tables.
        // Decoded payload tables also have a top-level nested "metadata" struct; we can mostly
        // just drop this since the same metadata object is encoded in the payload, but we do
        // parse out the document namespace, type, and version since those are necessary in the
        // case of a Sink job that doesn't look at the payload but still may need those
        // attributes in order to route to the correct destination.
        Map<String, String> attributes = new HashMap<>();
        tableSchema.getFields().stream() //
            .filter(f -> !"REPEATED".equals(f.getMode())) //
            .forEach(f -> {
              Object value = record.get(f.getName());
              if (value != null) {
                switch (f.getType()) {
                  case "TIMESTAMP":
                    attributes.put(f.getName(), Time.epochMicrosToTimestamp((Long) value));
                    break;
                  case "STRING":
                  case "INTEGER":
                  case "INT64":
                    attributes.put(f.getName(), value.toString());
                    break;
                  case "RECORD":
                  case "STRUCT":
                    // The only struct we support is the top-level nested "metadata" and we
                    // extract only the attributes needed for destination routing.
                    GenericRecord metadata = (GenericRecord) value;
                    Arrays
                        .asList(Attribute.DOCUMENT_NAMESPACE, Attribute.DOCUMENT_TYPE,
                            Attribute.DOCUMENT_VERSION)
                        .forEach(v -> attributes.put(v, metadata.get(v).toString()));
                    break;
                  // Ignore any other types (only the payload BYTES field should hit this).
                  default:
                    break;
                }
              }
            });
        return new PubsubMessage(payload, attributes);
      }) //
      .withCoder(PubsubMessageWithAttributesCoder.of()) //
      .withTemplateCompatibility() //
      .withoutValidation() //
      .withMethod(method.method);
  switch (source) {
    case TABLE:
      read = read.from(tableSpec);
      break;
    default:
    case QUERY:
      read = read.fromQuery(tableSpec).usingStandardSql();
  }
  if (source == Source.TABLE && method == BigQueryReadMethod.storageapi) {
    if (rowRestriction != null) {
      read = read.withRowRestriction(rowRestriction);
    }
    if (selectedFields != null) {
      read = read.withSelectedFields(selectedFields);
    }
  }
  return input.apply(read);
}

Source File: BigQueryToTFRecordTest.java From DataflowTemplates with Apache License 2.0

4 votes

/** Test {@link BigQueryToTFRecord} correctly outputs TFRecord. */
@Test
public void record2ExampleTest() throws InvalidProtocolBufferException {

  Long i1 = new Long(0);
  double f1 = 0.0d;
  String s1 = "";
  byte[] b1 = new byte[8];

  record.put("int1", i1);
  record.put("float1", f1);
  record.put("string1", s1);
  record.put("bytes1", b1);
  record.put("bool1", true);

  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  fields.add(new TableFieldSchema().setName("int1").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("float1").setType("FLOAT"));
  fields.add(new TableFieldSchema().setName("string1").setType("STRING"));
  fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES"));
  fields.add(new TableFieldSchema().setName("bool1").setType("BOOLEAN"));
  final TableSchema tableSchema = new TableSchema();
  tableSchema.setFields(fields);
  final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema);

  Example.Builder example = Example.newBuilder();
  Features.Builder features = example.getFeaturesBuilder();
  Feature.Builder int1 = Feature.newBuilder();
  Feature.Builder float1 = Feature.newBuilder();
  Feature.Builder string1 = Feature.newBuilder();
  Feature.Builder bytes1 = Feature.newBuilder();
  Feature.Builder bool1 = Feature.newBuilder();

  int1.getInt64ListBuilder().addValue(i1);
  float1.getFloatListBuilder().addValue((float) f1);
  string1.getBytesListBuilder().addValue(ByteString.copyFromUtf8(s1));
  bytes1.getBytesListBuilder().addValue(ByteString.copyFrom(b1));
  bool1.getInt64ListBuilder().addValue(1);

  features.putFeature("int1", int1.build());
  features.putFeature("float1", float1.build());
  features.putFeature("string1", string1.build());
  features.putFeature("bytes1", bytes1.build());
  features.putFeature("bool1", bool1.build());

  byte[] gotBytes = record2Example(schemaAndRecord);
  Example gotExample = Example.parseFrom(gotBytes);

  Map<String, Feature> gotFeatures = gotExample.getFeatures().getFeatureMap();
  Feature[] got = new Feature[5];
  got[0] = gotFeatures.get("int1");
  got[1] = gotFeatures.get("float1");
  got[2] = gotFeatures.get("string1");
  got[3] = gotFeatures.get("bytes1");
  got[4] = gotFeatures.get("bool1");

  final Example wantExample = example.build();
  Map<String, Feature> wantFeatures = wantExample.getFeatures().getFeatureMap();
  Feature[] want = new Feature[5];
  want[0] = wantFeatures.get("int1");
  want[1] = wantFeatures.get("float1");
  want[2] = wantFeatures.get("string1");
  want[3] = wantFeatures.get("bytes1");
  want[4] = wantFeatures.get("bool1");

  for (int i = 0; i < 5; i++) {
    Assert.assertThat(got[i], equalTo(want[i]));
  }
}

Source File: BillingEventTest.java From nomulus with Apache License 2.0

4 votes

@Before
public void initializeRecord() {
  // Create a record with a given JSON schema.
  schemaAndRecord = new SchemaAndRecord(createRecord(), null);
}

Source File: BigQueryToParquet.java From DataflowTemplates with Apache License 2.0

4 votes

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
private static PipelineResult run(BigQueryToParquetOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  TableReadOptions.Builder builder = TableReadOptions.newBuilder();

  /* Add fields to filter export on, if any. */
  if (options.getFields() != null) {
    builder.addAllSelectedFields(Arrays.asList(options.getFields().split(",\\s*")));
  }

  TableReadOptions tableReadOptions = builder.build();
  BigQueryStorageClient client = BigQueryStorageClientFactory.create();
  ReadSession session =
      ReadSessionFactory.create(client, options.getTableRef(), tableReadOptions);

  // Extract schema from ReadSession
  Schema schema = getTableSchema(session);
  client.close();

  /*
   * Steps: 1) Read records from BigQuery via BigQueryIO.
   *        2) Write records to Google Cloud Storage in Parquet format.
   */
  pipeline
      /*
       * Step 1: Read records via BigQueryIO using supplied schema as a PCollection of
       *         {@link GenericRecord}.
       */
      .apply(
          "ReadFromBigQuery",
          BigQueryIO.read(SchemaAndRecord::getRecord)
              .from(options.getTableRef())
              .withTemplateCompatibility()
              .withMethod(Method.DIRECT_READ)
              .withCoder(AvroCoder.of(schema))
              .withReadOptions(tableReadOptions))
      /*
       * Step 2: Write records to Google Cloud Storage as one or more Parquet files
       *         via {@link ParquetIO}.
       */
      .apply(
          "WriteToParquet",
          FileIO.<GenericRecord>write()
              .via(ParquetIO.sink(schema))
              .to(options.getBucket())
              .withNumShards(options.getNumShards())
              .withSuffix(FILE_SUFFIX));

  // Execute the pipeline and return the result.
  return pipeline.run();
}

org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord Java Examples