org.apache.beam.sdk.schemas.Schema#Builder

Source File: BigQueryUtils.java From beam with Apache License 2.0

7 votes

private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas) {
  Schema.Builder schemaBuilder = Schema.builder();
  for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
    FieldType fieldType =
        fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields());

    Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
    if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) {
      fieldType = FieldType.array(fieldType);
    }

    // if the mode is not defined or if it is set to NULLABLE, then the field is nullable
    boolean nullable =
        !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
    Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
    if (tableFieldSchema.getDescription() != null
        && !"".equals(tableFieldSchema.getDescription())) {
      field = field.withDescription(tableFieldSchema.getDescription());
    }
    schemaBuilder.addField(field);
  }
  return schemaBuilder.build();
}

Source File: Select.java From beam with Apache License 2.0

5 votes

private static Schema uniquifyNames(Schema schema) {
  Schema.Builder builder = new Schema.Builder();
  for (Field field : schema.getFields()) {
    builder.addField(UUID.randomUUID().toString(), uniquifyNames(field.getType()));
  }
  return builder.build();
}

Source File: SchemaAggregateFn.java From beam with Apache License 2.0

5 votes

private Schema getOutputSchema(List<FieldAggregation> fieldAggregations) {
  Schema.Builder outputSchema = Schema.builder();
  for (FieldAggregation aggregation : fieldAggregations) {
    outputSchema.addField(aggregation.outputField);
  }
  return outputSchema.build();
}

Source File: CoGroup.java From beam with Apache License 2.0

5 votes

static Schema getUnexandedOutputSchema(String keyFieldName, JoinInformation joinInformation) {
  Schema.Builder schemaBuilder =
      Schema.builder().addRowField(keyFieldName, joinInformation.keySchema);
  for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) {
    schemaBuilder.addIterableField(entry.getKey(), FieldType.row(entry.getValue()));
  }
  return schemaBuilder.build();
}

Source File: CoGroup.java From beam with Apache License 2.0

5 votes

static Schema getExpandedOutputSchema(JoinInformation joinInformation, JoinArguments joinArgs) {
  // Construct the output schema. It contains one field for each input PCollection, of type
  // ROW. If a field has optional participation, then that field will be nullable in the
  // schema.
  Schema.Builder joinedSchemaBuilder = Schema.builder();
  for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) {
    FieldType fieldType = FieldType.row(entry.getValue());
    if (joinArgs.getOptionalParticipation(entry.getKey())) {
      fieldType = fieldType.withNullable(true);
    }
    joinedSchemaBuilder.addField(entry.getKey(), fieldType);
  }
  return joinedSchemaBuilder.build();
}

Source File: AvroUtils.java From beam with Apache License 2.0

5 votes

/**
 * Converts AVRO schema to Beam row schema.
 *
 * @param schema schema of type RECORD
 */
public static Schema toBeamSchema(org.apache.avro.Schema schema) {
  Schema.Builder builder = Schema.builder();

  for (org.apache.avro.Schema.Field field : schema.getFields()) {
    Field beamField = toBeamField(field);
    if (field.doc() != null) {
      beamField = beamField.withDescription(field.doc());
    }
    builder.addField(beamField);
  }

  return builder.build();
}

Source File: SelectHelpers.java From beam with Apache License 2.0

5 votes

private static Schema union(Iterable<Schema> schemas) {
  Schema.Builder unioned = Schema.builder();
  for (Schema schema : schemas) {
    unioned.addFields(schema.getFields());
  }
  return unioned.build();
}

Source File: StaticSchemaInference.java From beam with Apache License 2.0

5 votes

/**
 * Infer a schema from a Java class.
 *
 * <p>Takes in a function to extract a list of field types from a class. Different callers may
 * have different strategies for extracting this list: e.g. introspecting public member variables,
 * public getter methods, or special annotations on the class.
 */
public static Schema schemaFromClass(
    Class<?> clazz, FieldValueTypeSupplier fieldValueTypeSupplier) {
  Schema.Builder builder = Schema.builder();
  for (FieldValueTypeInformation type : fieldValueTypeSupplier.get(clazz)) {
    Schema.FieldType fieldType = fieldFromType(type.getType(), fieldValueTypeSupplier);
    if (type.isNullable()) {
      builder.addNullableField(type.getName(), fieldType);
    } else {
      builder.addField(type.getName(), fieldType);
    }
  }
  return builder.build();
}

Source File: SchemaUtil.java From beam with Apache License 2.0

5 votes

/** Infers the Beam {@link Schema} from {@link ResultSetMetaData}. */
static Schema toBeamSchema(ResultSetMetaData md) throws SQLException {
  Schema.Builder schemaBuilder = Schema.builder();

  for (int i = 1; i <= md.getColumnCount(); i++) {
    JDBCType jdbcType = valueOf(md.getColumnType(i));
    BeamFieldConverter fieldConverter = jdbcTypeToBeamFieldConverter(jdbcType);
    schemaBuilder.addField(fieldConverter.create(i, md));
  }

  return schemaBuilder.build();
}

Source File: JdbcIOTest.java From beam with Apache License 2.0

5 votes

@Test
public void testWriteWithoutPsWithNonNullableTableField() throws Exception {
  final int rowsToAdd = 10;

  Schema.Builder schemaBuilder = Schema.builder();
  schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
  schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
  Schema schema = schemaBuilder.build();

  String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE");
  StringBuilder stmt = new StringBuilder("CREATE TABLE ");
  stmt.append(tableName);
  stmt.append(" (");
  stmt.append("column_boolean       BOOLEAN,");
  stmt.append("column_int           INTEGER NOT NULL");
  stmt.append(" )");
  DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString());
  try {
    ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
    pipeline
        .apply(Create.of(data))
        .setRowSchema(schema)
        .apply(
            JdbcIO.<Row>write()
                .withDataSourceConfiguration(
                    JdbcIO.DataSourceConfiguration.create(
                        "org.apache.derby.jdbc.ClientDriver",
                        "jdbc:derby://localhost:" + port + "/target/beam"))
                .withBatchSize(10L)
                .withTable(tableName));
    pipeline.run();
  } finally {
    DatabaseTestHelper.deleteTable(dataSource, tableName);
    thrown.expect(RuntimeException.class);
  }
}

Source File: AddFields.java From beam with Apache License 2.0

4 votes

private static AddFieldsInformation getAddFieldsInformation(
    Schema inputSchema, Collection<NewField> fieldsToAdd) {
  List<NewField> newTopLevelFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  List<NewField> newNestedFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  // Group all nested fields together by the field at the current level. For example, if adding
  // a.b, a.c, a.d
  // this map will contain a -> {a.b, a.c, a.d}.
  Multimap<String, NewField> newNestedFieldsMap =
      Multimaps.index(newNestedFields, NewField::getName);

  Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap();
  Schema.Builder builder = Schema.builder();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    Schema.Field field = inputSchema.getField(i);
    Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName());

    // If this field is a nested field and new subfields are added further down the tree, add
    // those subfields before
    // adding to the current schema. Otherwise we just add this field as is to the new schema.
    if (!nestedFields.isEmpty()) {
      nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList());

      AddFieldsInformation nestedInformation =
          getAddFieldsInformation(field.getType(), nestedFields);
      field = field.withType(nestedInformation.getOutputFieldType());
      resolvedNestedNewValues.put(i, nestedInformation);
    }
    builder.addField(field);
  }

  // Add any new fields at this level.
  List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size());
  for (NewField newField : newTopLevelFields) {
    builder.addField(newField.getName(), newField.getFieldType());
    newValuesThisLevel.add(newField.getDefaultValue());
  }

  // If there are any nested field additions left that are not already processed, that means
  // that the root of the
  // nested field doesn't exist in the schema. In this case we'll walk down the new nested
  // fields and recursively create each nested level as necessary.
  for (Map.Entry<String, Collection<NewField>> newNested :
      newNestedFieldsMap.asMap().entrySet()) {
    String fieldName = newNested.getKey();

    // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x)
    FieldAccessDescriptor.FieldDescriptor fieldDescriptor =
        Iterables.getOnlyElement(
            newNested.getValue().stream()
                .map(NewField::getFieldDescriptor)
                .distinct()
                .collect(Collectors.toList()));
    FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true);
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
      // The problem with adding recursive map fields is that we don't know what the map key
      // type should be.
      // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in
      // the map value.
      // Since in this code path the mapField field does not already exist this means we need to
      // create the new
      // map field, and we have no way of knowing what type the key should be.
      // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or
      // extend our selector
      // syntax to allow specifying key types.
      checkArgument(
          !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here");
      fieldType = FieldType.array(fieldType).withNullable(true);
    }
    if (!inputSchema.hasField(fieldName)) {
      // This is a brand-new nested field with no matching field in the input schema. We will
      // recursively create a nested schema to match it.
      Collection<NewField> nestedNewFields =
          newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList());
      AddFieldsInformation addFieldsInformation =
          getAddFieldsInformation(fieldType, nestedNewFields);
      builder.addField(fieldName, addFieldsInformation.getOutputFieldType());
      resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation);
    }
  }
  Schema schema = builder.build();

  List<AddFieldsInformation> nestedNewValueList =
      new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null));
  for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) {
    nestedNewValueList.set(entry.getKey(), entry.getValue());
  }
  return AddFieldsInformation.of(
      Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList);
}

Source File: JdbcIOTest.java From beam with Apache License 2.0

4 votes

@Test
public void testWriteWithoutPreparedStatement() throws Exception {
  final int rowsToAdd = 10;

  Schema.Builder schemaBuilder = Schema.builder();
  schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
  schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
  schemaBuilder.addField(Schema.Field.of("column_int", Schema.FieldType.INT32));
  schemaBuilder.addField(Schema.Field.of("column_long", Schema.FieldType.INT64));
  schemaBuilder.addField(Schema.Field.of("column_float", Schema.FieldType.FLOAT));
  schemaBuilder.addField(Schema.Field.of("column_double", Schema.FieldType.DOUBLE));
  schemaBuilder.addField(Schema.Field.of("column_bigdecimal", Schema.FieldType.DECIMAL));
  schemaBuilder.addField(Schema.Field.of("column_date", LogicalTypes.JDBC_DATE_TYPE));
  schemaBuilder.addField(Schema.Field.of("column_time", LogicalTypes.JDBC_TIME_TYPE));
  schemaBuilder.addField(
      Schema.Field.of("column_timestamptz", LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE));
  schemaBuilder.addField(Schema.Field.of("column_timestamp", Schema.FieldType.DATETIME));
  schemaBuilder.addField(Schema.Field.of("column_short", Schema.FieldType.INT16));
  Schema schema = schemaBuilder.build();

  String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS");
  StringBuilder stmt = new StringBuilder("CREATE TABLE ");
  stmt.append(tableName);
  stmt.append(" (");
  stmt.append("column_boolean       BOOLEAN,"); // boolean
  stmt.append("column_string        VARCHAR(254),"); // String
  stmt.append("column_int           INTEGER,"); // int
  stmt.append("column_long          BIGINT,"); // long
  stmt.append("column_float         REAL,"); // float
  stmt.append("column_double        DOUBLE PRECISION,"); // double
  stmt.append("column_bigdecimal    DECIMAL(13,0),"); // BigDecimal
  stmt.append("column_date          DATE,"); // Date
  stmt.append("column_time          TIME,"); // Time
  stmt.append("column_timestamptz   TIMESTAMP,"); // Timestamp
  stmt.append("column_timestamp     TIMESTAMP,"); // Timestamp
  stmt.append("column_short         SMALLINT"); // short
  stmt.append(" )");
  DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString());
  try {
    ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
    pipeline
        .apply(Create.of(data))
        .setRowSchema(schema)
        .apply(
            JdbcIO.<Row>write()
                .withDataSourceConfiguration(
                    JdbcIO.DataSourceConfiguration.create(
                        "org.apache.derby.jdbc.ClientDriver",
                        "jdbc:derby://localhost:" + port + "/target/beam"))
                .withBatchSize(10L)
                .withTable(tableName));
    pipeline.run();
    assertRowCount(tableName, rowsToAdd);
  } finally {
    DatabaseTestHelper.deleteTable(dataSource, tableName);
  }
}

Java Code Examples for org.apache.beam.sdk.schemas.Schema#Builder