Java Code Examples for org.apache.beam.sdk.schemas.Schema#Builder
The following examples show how to use
org.apache.beam.sdk.schemas.Schema#Builder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BigQueryUtils.java From beam with Apache License 2.0 | 7 votes |
private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas) { Schema.Builder schemaBuilder = Schema.builder(); for (TableFieldSchema tableFieldSchema : tableFieldSchemas) { FieldType fieldType = fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields()); Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf); if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) { fieldType = FieldType.array(fieldType); } // if the mode is not defined or if it is set to NULLABLE, then the field is nullable boolean nullable = !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent(); Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable); if (tableFieldSchema.getDescription() != null && !"".equals(tableFieldSchema.getDescription())) { field = field.withDescription(tableFieldSchema.getDescription()); } schemaBuilder.addField(field); } return schemaBuilder.build(); }
Example 2
Source File: Select.java From beam with Apache License 2.0 | 5 votes |
private static Schema uniquifyNames(Schema schema) { Schema.Builder builder = new Schema.Builder(); for (Field field : schema.getFields()) { builder.addField(UUID.randomUUID().toString(), uniquifyNames(field.getType())); } return builder.build(); }
Example 3
Source File: SchemaAggregateFn.java From beam with Apache License 2.0 | 5 votes |
private Schema getOutputSchema(List<FieldAggregation> fieldAggregations) { Schema.Builder outputSchema = Schema.builder(); for (FieldAggregation aggregation : fieldAggregations) { outputSchema.addField(aggregation.outputField); } return outputSchema.build(); }
Example 4
Source File: CoGroup.java From beam with Apache License 2.0 | 5 votes |
static Schema getUnexandedOutputSchema(String keyFieldName, JoinInformation joinInformation) { Schema.Builder schemaBuilder = Schema.builder().addRowField(keyFieldName, joinInformation.keySchema); for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) { schemaBuilder.addIterableField(entry.getKey(), FieldType.row(entry.getValue())); } return schemaBuilder.build(); }
Example 5
Source File: CoGroup.java From beam with Apache License 2.0 | 5 votes |
static Schema getExpandedOutputSchema(JoinInformation joinInformation, JoinArguments joinArgs) { // Construct the output schema. It contains one field for each input PCollection, of type // ROW. If a field has optional participation, then that field will be nullable in the // schema. Schema.Builder joinedSchemaBuilder = Schema.builder(); for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) { FieldType fieldType = FieldType.row(entry.getValue()); if (joinArgs.getOptionalParticipation(entry.getKey())) { fieldType = fieldType.withNullable(true); } joinedSchemaBuilder.addField(entry.getKey(), fieldType); } return joinedSchemaBuilder.build(); }
Example 6
Source File: AvroUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Converts AVRO schema to Beam row schema. * * @param schema schema of type RECORD */ public static Schema toBeamSchema(org.apache.avro.Schema schema) { Schema.Builder builder = Schema.builder(); for (org.apache.avro.Schema.Field field : schema.getFields()) { Field beamField = toBeamField(field); if (field.doc() != null) { beamField = beamField.withDescription(field.doc()); } builder.addField(beamField); } return builder.build(); }
Example 7
Source File: SelectHelpers.java From beam with Apache License 2.0 | 5 votes |
private static Schema union(Iterable<Schema> schemas) { Schema.Builder unioned = Schema.builder(); for (Schema schema : schemas) { unioned.addFields(schema.getFields()); } return unioned.build(); }
Example 8
Source File: StaticSchemaInference.java From beam with Apache License 2.0 | 5 votes |
/** * Infer a schema from a Java class. * * <p>Takes in a function to extract a list of field types from a class. Different callers may * have different strategies for extracting this list: e.g. introspecting public member variables, * public getter methods, or special annotations on the class. */ public static Schema schemaFromClass( Class<?> clazz, FieldValueTypeSupplier fieldValueTypeSupplier) { Schema.Builder builder = Schema.builder(); for (FieldValueTypeInformation type : fieldValueTypeSupplier.get(clazz)) { Schema.FieldType fieldType = fieldFromType(type.getType(), fieldValueTypeSupplier); if (type.isNullable()) { builder.addNullableField(type.getName(), fieldType); } else { builder.addField(type.getName(), fieldType); } } return builder.build(); }
Example 9
Source File: SchemaUtil.java From beam with Apache License 2.0 | 5 votes |
/** Infers the Beam {@link Schema} from {@link ResultSetMetaData}. */ static Schema toBeamSchema(ResultSetMetaData md) throws SQLException { Schema.Builder schemaBuilder = Schema.builder(); for (int i = 1; i <= md.getColumnCount(); i++) { JDBCType jdbcType = valueOf(md.getColumnType(i)); BeamFieldConverter fieldConverter = jdbcTypeToBeamFieldConverter(jdbcType); schemaBuilder.addField(fieldConverter.create(i, md)); } return schemaBuilder.build(); }
Example 10
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWriteWithoutPsWithNonNullableTableField() throws Exception { final int rowsToAdd = 10; Schema.Builder schemaBuilder = Schema.builder(); schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN)); schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING)); Schema schema = schemaBuilder.build(); String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE"); StringBuilder stmt = new StringBuilder("CREATE TABLE "); stmt.append(tableName); stmt.append(" ("); stmt.append("column_boolean BOOLEAN,"); stmt.append("column_int INTEGER NOT NULL"); stmt.append(" )"); DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString()); try { ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema); pipeline .apply(Create.of(data)) .setRowSchema(schema) .apply( JdbcIO.<Row>write() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration.create( "org.apache.derby.jdbc.ClientDriver", "jdbc:derby://localhost:" + port + "/target/beam")) .withBatchSize(10L) .withTable(tableName)); pipeline.run(); } finally { DatabaseTestHelper.deleteTable(dataSource, tableName); thrown.expect(RuntimeException.class); } }
Example 11
Source File: AddFields.java From beam with Apache License 2.0 | 4 votes |
private static AddFieldsInformation getAddFieldsInformation( Schema inputSchema, Collection<NewField> fieldsToAdd) { List<NewField> newTopLevelFields = fieldsToAdd.stream() .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty()) .collect(Collectors.toList()); List<NewField> newNestedFields = fieldsToAdd.stream() .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty()) .collect(Collectors.toList()); // Group all nested fields together by the field at the current level. For example, if adding // a.b, a.c, a.d // this map will contain a -> {a.b, a.c, a.d}. Multimap<String, NewField> newNestedFieldsMap = Multimaps.index(newNestedFields, NewField::getName); Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap(); Schema.Builder builder = Schema.builder(); for (int i = 0; i < inputSchema.getFieldCount(); ++i) { Schema.Field field = inputSchema.getField(i); Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName()); // If this field is a nested field and new subfields are added further down the tree, add // those subfields before // adding to the current schema. Otherwise we just add this field as is to the new schema. if (!nestedFields.isEmpty()) { nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList()); AddFieldsInformation nestedInformation = getAddFieldsInformation(field.getType(), nestedFields); field = field.withType(nestedInformation.getOutputFieldType()); resolvedNestedNewValues.put(i, nestedInformation); } builder.addField(field); } // Add any new fields at this level. List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size()); for (NewField newField : newTopLevelFields) { builder.addField(newField.getName(), newField.getFieldType()); newValuesThisLevel.add(newField.getDefaultValue()); } // If there are any nested field additions left that are not already processed, that means // that the root of the // nested field doesn't exist in the schema. In this case we'll walk down the new nested // fields and recursively create each nested level as necessary. for (Map.Entry<String, Collection<NewField>> newNested : newNestedFieldsMap.asMap().entrySet()) { String fieldName = newNested.getKey(); // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x) FieldAccessDescriptor.FieldDescriptor fieldDescriptor = Iterables.getOnlyElement( newNested.getValue().stream() .map(NewField::getFieldDescriptor) .distinct() .collect(Collectors.toList())); FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true); for (Qualifier qualifier : fieldDescriptor.getQualifiers()) { // The problem with adding recursive map fields is that we don't know what the map key // type should be. // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in // the map value. // Since in this code path the mapField field does not already exist this means we need to // create the new // map field, and we have no way of knowing what type the key should be. // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or // extend our selector // syntax to allow specifying key types. checkArgument( !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here"); fieldType = FieldType.array(fieldType).withNullable(true); } if (!inputSchema.hasField(fieldName)) { // This is a brand-new nested field with no matching field in the input schema. We will // recursively create a nested schema to match it. Collection<NewField> nestedNewFields = newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList()); AddFieldsInformation addFieldsInformation = getAddFieldsInformation(fieldType, nestedNewFields); builder.addField(fieldName, addFieldsInformation.getOutputFieldType()); resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation); } } Schema schema = builder.build(); List<AddFieldsInformation> nestedNewValueList = new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null)); for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) { nestedNewValueList.set(entry.getKey(), entry.getValue()); } return AddFieldsInformation.of( Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList); }
Example 12
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWriteWithoutPreparedStatement() throws Exception { final int rowsToAdd = 10; Schema.Builder schemaBuilder = Schema.builder(); schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN)); schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING)); schemaBuilder.addField(Schema.Field.of("column_int", Schema.FieldType.INT32)); schemaBuilder.addField(Schema.Field.of("column_long", Schema.FieldType.INT64)); schemaBuilder.addField(Schema.Field.of("column_float", Schema.FieldType.FLOAT)); schemaBuilder.addField(Schema.Field.of("column_double", Schema.FieldType.DOUBLE)); schemaBuilder.addField(Schema.Field.of("column_bigdecimal", Schema.FieldType.DECIMAL)); schemaBuilder.addField(Schema.Field.of("column_date", LogicalTypes.JDBC_DATE_TYPE)); schemaBuilder.addField(Schema.Field.of("column_time", LogicalTypes.JDBC_TIME_TYPE)); schemaBuilder.addField( Schema.Field.of("column_timestamptz", LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE)); schemaBuilder.addField(Schema.Field.of("column_timestamp", Schema.FieldType.DATETIME)); schemaBuilder.addField(Schema.Field.of("column_short", Schema.FieldType.INT16)); Schema schema = schemaBuilder.build(); String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS"); StringBuilder stmt = new StringBuilder("CREATE TABLE "); stmt.append(tableName); stmt.append(" ("); stmt.append("column_boolean BOOLEAN,"); // boolean stmt.append("column_string VARCHAR(254),"); // String stmt.append("column_int INTEGER,"); // int stmt.append("column_long BIGINT,"); // long stmt.append("column_float REAL,"); // float stmt.append("column_double DOUBLE PRECISION,"); // double stmt.append("column_bigdecimal DECIMAL(13,0),"); // BigDecimal stmt.append("column_date DATE,"); // Date stmt.append("column_time TIME,"); // Time stmt.append("column_timestamptz TIMESTAMP,"); // Timestamp stmt.append("column_timestamp TIMESTAMP,"); // Timestamp stmt.append("column_short SMALLINT"); // short stmt.append(" )"); DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString()); try { ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema); pipeline .apply(Create.of(data)) .setRowSchema(schema) .apply( JdbcIO.<Row>write() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration.create( "org.apache.derby.jdbc.ClientDriver", "jdbc:derby://localhost:" + port + "/target/beam")) .withBatchSize(10L) .withTable(tableName)); pipeline.run(); assertRowCount(tableName, rowsToAdd); } finally { DatabaseTestHelper.deleteTable(dataSource, tableName); } }