Java Code Examples for org.apache.beam.sdk.schemas.Schema#of()
The following examples show how to use
org.apache.beam.sdk.schemas.Schema#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BeamSqlDslProjectTest.java From beam with Apache License 2.0 | 6 votes |
/** * Trivial programs project precisely their input fields, without dropping or re-ordering them. * * @see <a href="https://issues.apache.org/jira/browse/BEAM-6810">BEAM-6810</a> */ @Test public void testTrivialProjection() { String sql = "SELECT c_int64 as abc FROM PCOLLECTION"; Schema inputSchema = Schema.of(Schema.Field.of("c_int64", Schema.FieldType.INT64)); Schema outputSchema = Schema.of(Schema.Field.of("abc", Schema.FieldType.INT64)); PCollection<Row> input = pipeline.apply( Create.of(Row.withSchema(inputSchema).addValue(42L).build()) .withRowSchema(inputSchema)); PCollection<Row> result = input.apply(SqlTransform.query(sql)); Assert.assertEquals(outputSchema, result.getSchema()); PAssert.that(result).containsInAnyOrder(Row.withSchema(outputSchema).addValue(42L).build()); pipeline.run(); }
Example 2
Source File: ClickHouseIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testInt64WithDefault() throws Exception { Schema schema = Schema.of(Schema.Field.nullable("f0", FieldType.INT64)); Row row1 = Row.withSchema(schema).addValue(1L).build(); Row row2 = Row.withSchema(schema).addValue(null).build(); Row row3 = Row.withSchema(schema).addValue(3L).build(); executeSql("CREATE TABLE test_int64_with_default (f0 Int64 DEFAULT -1) ENGINE=Log"); pipeline .apply(Create.of(row1, row2, row3).withRowSchema(schema)) .apply(write("test_int64_with_default")); pipeline.run().waitUntilFinish(); long sum = executeQueryAsLong("SELECT SUM(f0) FROM test_int64_with_default"); assertEquals(3L, sum); }
Example 3
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadRowsWithDataSourceConfiguration() { PCollection<Row> rows = pipeline.apply( JdbcIO.readRows() .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource)) .withQuery(String.format("select name,id from %s where name = ?", readTableName)) .withStatementPreparator( preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1)))); Schema expectedSchema = Schema.of( Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)) .withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true)); assertEquals(expectedSchema, rows.getSchema()); PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID")); PAssert.that(output) .containsInAnyOrder( ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build())); pipeline.run(); }
Example 4
Source File: ClickHouseIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testNullableInt64() throws Exception { Schema schema = Schema.of(Schema.Field.nullable("f0", FieldType.INT64)); Row row1 = Row.withSchema(schema).addValue(1L).build(); Row row2 = Row.withSchema(schema).addValue(null).build(); Row row3 = Row.withSchema(schema).addValue(3L).build(); executeSql("CREATE TABLE test_nullable_int64 (f0 Nullable(Int64)) ENGINE=Log"); pipeline .apply(Create.of(row1, row2, row3).withRowSchema(schema)) .apply(write("test_nullable_int64")); pipeline.run().waitUntilFinish(); long sum = executeQueryAsLong("SELECT SUM(f0) FROM test_nullable_int64"); long count0 = executeQueryAsLong("SELECT COUNT(*) FROM test_nullable_int64"); long count1 = executeQueryAsLong("SELECT COUNT(f0) FROM test_nullable_int64"); assertEquals(4L, sum); assertEquals(3L, count0); assertEquals(2L, count1); }
Example 5
Source File: CastTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testTypeWiden() { Schema inputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT16), Schema.Field.of("f1", Schema.FieldType.INT32)); Schema outputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT32), Schema.Field.of("f1", Schema.FieldType.INT64)); Row input = Row.withSchema(inputSchema).addValues((short) 1, 2).build(); Row expected = Row.withSchema(outputSchema).addValues(1, 2L).build(); PCollection<Row> output = pipeline .apply(Create.of(input).withRowSchema(inputSchema)) .apply(Cast.widening(outputSchema)); PAssert.that(output).containsInAnyOrder(expected); pipeline.run(); }
Example 6
Source File: RowCoderTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testConsistentWithEqualsArrayOfArrayOfBytes() throws Exception { FieldType fieldType = FieldType.array(FieldType.array(FieldType.BYTES)); Schema schema = Schema.of(Schema.Field.of("f1", fieldType)); RowCoder coder = RowCoder.of(schema); List<byte[]> innerList1 = Collections.singletonList(new byte[] {1, 2, 3, 4}); List<List<byte[]>> list1 = Collections.singletonList(innerList1); Row row1 = Row.withSchema(schema).addValue(list1).build(); List<byte[]> innerList2 = Collections.singletonList(new byte[] {1, 2, 3, 4}); List<List<byte[]>> list2 = Collections.singletonList(innerList2); Row row2 = Row.withSchema(schema).addValue(list2).build(); Assume.assumeTrue(coder.consistentWithEquals()); CoderProperties.coderConsistentWithEquals(coder, row1, row2); }
Example 7
Source File: ClickHouseIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testArrayOfArrayOfInt64() throws Exception { Schema schema = Schema.of(Schema.Field.of("f0", FieldType.array(FieldType.array(FieldType.INT64)))); Row row1 = Row.withSchema(schema) .addValue( Arrays.asList(Arrays.asList(1L, 2L), Arrays.asList(2L, 3L), Arrays.asList(3L, 4L))) .build(); executeSql("CREATE TABLE test_array_of_array_of_int64 (f0 Array(Array(Int64))) ENGINE=Log"); pipeline .apply(Create.of(row1).withRowSchema(schema)) .apply(write("test_array_of_array_of_int64")); pipeline.run().waitUntilFinish(); long sum0 = executeQueryAsLong( "SELECT SUM(arraySum(arrayMap(x -> arraySum(x), f0))) " + "FROM test_array_of_array_of_int64"); assertEquals(15L, sum0); }
Example 8
Source File: CastTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCastInnerRowFail() { Schema innerInputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT16), Schema.Field.of("f1", Schema.FieldType.INT64)); Schema inputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.row(innerInputSchema)), Schema.Field.of("f1", Schema.FieldType.INT32)); Schema innerOutputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT32), Schema.Field.of("f1", Schema.FieldType.INT32)); Schema outputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.row(innerOutputSchema)), Schema.Field.of("f1", Schema.FieldType.INT64)); expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage(containsString("f0.f1: Can't cast 'INT64' to 'INT32'")); Cast.widening(outputSchema).verifyCompatibility(inputSchema); }
Example 9
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadWithSchema() { SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource; JdbcIO.RowMapper<RowWithSchema> rowMapper = rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID")); pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class); PCollection<RowWithSchema> rows = pipeline.apply( JdbcIO.<RowWithSchema>read() .withDataSourceProviderFn(dataSourceProvider) .withQuery(String.format("select name,id from %s where name = ?", readTableName)) .withRowMapper(rowMapper) .withCoder(SerializableCoder.of(RowWithSchema.class)) .withStatementPreparator( preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1)))); Schema expectedSchema = Schema.of( Schema.Field.of("name", Schema.FieldType.STRING), Schema.Field.of("id", Schema.FieldType.INT32)); assertEquals(expectedSchema, rows.getSchema()); PCollection<Row> output = rows.apply(Select.fieldNames("name", "id")); PAssert.that(output) .containsInAnyOrder( ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build())); pipeline.run(); }
Example 10
Source File: CastTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testIgnoreNullable() { // the opposite of testWeakenNullable Schema inputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT32), Schema.Field.nullable("f1", Schema.FieldType.INT64)); Schema outputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT16), Schema.Field.nullable("f1", Schema.FieldType.INT32)); Row input = Row.withSchema(inputSchema).addValues(1, 2L).build(); Row expected = Row.withSchema(outputSchema).addValues((short) 1, 2).build(); PCollection<Row> output = pipeline .apply(Create.of(input).withRowSchema(inputSchema)) .apply(Cast.narrowing(outputSchema)); PAssert.that(output).containsInAnyOrder(expected); pipeline.run(); }
Example 11
Source File: CastTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testTypeNarrow() { // the same as testTypeWiden, but to casting to the opposite direction Schema inputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT32), Schema.Field.of("f1", Schema.FieldType.INT64)); Schema outputSchema = Schema.of( Schema.Field.of("f0", Schema.FieldType.INT16), Schema.Field.of("f1", Schema.FieldType.INT32)); Row input = Row.withSchema(inputSchema).addValues(1, 2L).build(); Row expected = Row.withSchema(outputSchema).addValues((short) 1, 2).build(); PCollection<Row> output = pipeline .apply(Create.of(input).withRowSchema(inputSchema)) .apply(Cast.narrowing(outputSchema)); PAssert.that(output).containsInAnyOrder(expected); pipeline.run(); }
Example 12
Source File: AddFieldsTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void recursivelyAddNestedFields() { Schema schema = Schema.of(); Row row = Row.withSchema(schema).build(); PCollection<Row> added = pipeline .apply(Create.of(row).withRowSchema(schema)) .apply( AddFields.<Row>create() .field("nested.field1", Schema.FieldType.STRING, "value") .field("nested.field2", Schema.FieldType.INT32) .field("nested.field3", Schema.FieldType.array(Schema.FieldType.STRING)) .field("nested.field4", Schema.FieldType.iterable(Schema.FieldType.STRING))); Schema expectedNestedSchema = Schema.builder() .addStringField("field1") .addNullableField("field2", Schema.FieldType.INT32) .addNullableField("field3", Schema.FieldType.array(Schema.FieldType.STRING)) .addNullableField("field4", Schema.FieldType.iterable(Schema.FieldType.STRING)) .build(); Schema expectedSchema = Schema.builder() .addNullableField("nested", Schema.FieldType.row(expectedNestedSchema)) .build(); assertEquals(expectedSchema, added.getSchema()); Row expectedNested = Row.withSchema(expectedNestedSchema).addValues("value", null, null, null).build(); Row expected = Row.withSchema(expectedSchema).addValue(expectedNested).build(); PAssert.that(added).containsInAnyOrder(expected); pipeline.run(); }
Example 13
Source File: AtomicInsertTest.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Row> expand(PBegin input) { Schema schema = Schema.of(Schema.Field.of("f0", Schema.FieldType.INT64)); Iterable<Row> bundle = IntStream.range(0, size) .mapToObj(x -> Row.withSchema(schema).addValue((long) x).build()) .collect(Collectors.toList()); // make sure we get one big bundle return input .getPipeline() .apply(Create.<Iterable<Row>>of(bundle).withCoder(IterableCoder.of(RowCoder.of(schema)))) .apply(Flatten.iterables()) .setRowSchema(schema); }
Example 14
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadRowsWithoutStatementPreparator() { SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource; String name = TestRow.getNameForSeed(1); PCollection<Row> rows = pipeline.apply( JdbcIO.readRows() .withDataSourceProviderFn(dataSourceProvider) .withQuery( String.format( "select name,id from %s where name = '%s'", readTableName, name))); Schema expectedSchema = Schema.of( Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)) .withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true)); assertEquals(expectedSchema, rows.getSchema()); PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID")); PAssert.that(output) .containsInAnyOrder( ImmutableList.of(Row.withSchema(expectedSchema).addValues(name, 1).build())); pipeline.run(); }
Example 15
Source File: RowTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testByteBufferEquality() { byte[] a0 = new byte[] {1, 2, 3, 4}; byte[] b0 = new byte[] {1, 2, 3, 4}; Schema schema = Schema.of(Schema.Field.of("bytes", Schema.FieldType.BYTES)); Row a = Row.withSchema(schema).addValue(ByteBuffer.wrap(a0)).build(); Row b = Row.withSchema(schema).addValue(ByteBuffer.wrap(b0)).build(); assertEquals(a, b); }
Example 16
Source File: RowTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testByteArrayEquality() { byte[] a0 = new byte[] {1, 2, 3, 4}; byte[] b0 = new byte[] {1, 2, 3, 4}; Schema schema = Schema.of(Schema.Field.of("bytes", Schema.FieldType.BYTES)); Row a = Row.withSchema(schema).addValue(a0).build(); Row b = Row.withSchema(schema).addValue(b0).build(); assertEquals(a, b); }
Example 17
Source File: SqlBoundedSideInputJoin.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Bid> expand(PCollection<Event> events) { PCollection<Row> bids = events .apply(Filter.by(NexmarkQueryUtil.IS_BID)) .apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID)); checkState(getSideInput() != null, "Configuration error: side input is null"); TupleTag<Row> sideTag = new TupleTag<Row>("side") {}; TupleTag<Row> bidTag = new TupleTag<Row>("bid") {}; Schema schema = Schema.of( Schema.Field.of("id", Schema.FieldType.INT64), Schema.Field.of("extra", Schema.FieldType.STRING)); PCollection<Row> sideRows = getSideInput() .setSchema( schema, TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()), kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(), row -> KV.of(row.getInt64("id"), row.getString("extra"))) .apply("SideToRows", Convert.toRows()); return PCollectionTuple.of(bidTag, bids) .and(sideTag, sideRows) .apply( SqlTransform.query(String.format(query, configuration.sideInputRowCount)) .withQueryPlannerClass(plannerClass)) .apply("ResultToBid", Convert.fromRows(Bid.class)); }
Example 18
Source File: BeamSqlDslProjectTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBytesLiteral() { Schema outputSchema = Schema.of(Schema.Field.of("c_bytes", Schema.FieldType.BYTES)); PCollection<Row> result = PCollectionTuple.empty(pipeline).apply(SqlTransform.query("SELECT x'baadcafe' as c_bytes")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(outputSchema).addValue(new byte[] {-70, -83, -54, -2}).build()); pipeline.run(); }
Example 19
Source File: CastValidatorTest.java From beam with Apache License 2.0 | 5 votes |
private void testWideningOrder(TypeName input, TypeName output) { Schema inputSchema = Schema.of(Schema.Field.of("f0", FieldType.of(input))); Schema outputSchema = Schema.of(Schema.Field.of("f0", FieldType.of(output))); List<Cast.CompatibilityError> errors = Cast.Widening.of().apply(inputSchema, outputSchema); if (NUMERIC_ORDER.indexOf(input) <= NUMERIC_ORDER.indexOf(output)) { assertThat(input + " is before " + output, errors, empty()); } else { assertThat(input + " is after " + output, errors, not(empty())); } }
Example 20
Source File: ClickHouseIOTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testArrayOfPrimitiveTypes() throws Exception { Schema schema = Schema.of( Schema.Field.of("f0", FieldType.array(FieldType.DATETIME)), Schema.Field.of("f1", FieldType.array(FieldType.DATETIME)), Schema.Field.of("f2", FieldType.array(FieldType.FLOAT)), Schema.Field.of("f3", FieldType.array(FieldType.DOUBLE)), Schema.Field.of("f4", FieldType.array(FieldType.BYTE)), Schema.Field.of("f5", FieldType.array(FieldType.INT16)), Schema.Field.of("f6", FieldType.array(FieldType.INT32)), Schema.Field.of("f7", FieldType.array(FieldType.INT64)), Schema.Field.of("f8", FieldType.array(FieldType.STRING)), Schema.Field.of("f9", FieldType.array(FieldType.INT16)), Schema.Field.of("f10", FieldType.array(FieldType.INT32)), Schema.Field.of("f11", FieldType.array(FieldType.INT64)), Schema.Field.of("f12", FieldType.array(FieldType.INT64)), Schema.Field.of("f13", FieldType.array(FieldType.STRING)), Schema.Field.of("f14", FieldType.array(FieldType.STRING))); Row row1 = Row.withSchema(schema) .addArray( new DateTime(2030, 10, 1, 0, 0, 0, DateTimeZone.UTC), new DateTime(2031, 10, 1, 0, 0, 0, DateTimeZone.UTC)) .addArray( new DateTime(2030, 10, 9, 8, 7, 6, DateTimeZone.UTC), new DateTime(2031, 10, 9, 8, 7, 6, DateTimeZone.UTC)) .addArray(2.2f, 3.3f) .addArray(3.3, 4.4) .addArray((byte) 4, (byte) 5) .addArray((short) 5, (short) 6) .addArray(6, 7) .addArray(7L, 8L) .addArray("eight", "nine") .addArray((short) 9, (short) 10) .addArray(10, 11) .addArray(11L, 12L) .addArray(12L, 13L) .addArray("abc", "cde") .addArray("cde", "abc") .build(); executeSql( "CREATE TABLE test_array_of_primitive_types (" + "f0 Array(Date)," + "f1 Array(DateTime)," + "f2 Array(Float32)," + "f3 Array(Float64)," + "f4 Array(Int8)," + "f5 Array(Int16)," + "f6 Array(Int32)," + "f7 Array(Int64)," + "f8 Array(String)," + "f9 Array(UInt8)," + "f10 Array(UInt16)," + "f11 Array(UInt32)," + "f12 Array(UInt64)," + "f13 Array(Enum8('abc' = 1, 'cde' = 2))," + "f14 Array(Enum16('abc' = -1, 'cde' = -2))" + ") ENGINE=Log"); pipeline .apply(Create.of(row1).withRowSchema(schema)) .apply(write("test_array_of_primitive_types")); pipeline.run().waitUntilFinish(); try (ResultSet rs = executeQuery("SELECT * FROM test_array_of_primitive_types")) { rs.next(); assertEquals("['2030-10-01','2031-10-01']", rs.getString("f0")); assertEquals("['2030-10-09 08:07:06','2031-10-09 08:07:06']", rs.getString("f1")); assertEquals("[2.2,3.3]", rs.getString("f2")); assertEquals("[3.3,4.4]", rs.getString("f3")); assertEquals("[4,5]", rs.getString("f4")); assertEquals("[5,6]", rs.getString("f5")); assertEquals("[6,7]", rs.getString("f6")); assertEquals("[7,8]", rs.getString("f7")); assertEquals("['eight','nine']", rs.getString("f8")); assertEquals("[9,10]", rs.getString("f9")); assertEquals("[10,11]", rs.getString("f10")); assertEquals("[11,12]", rs.getString("f11")); assertEquals("[12,13]", rs.getString("f12")); assertEquals("['abc','cde']", rs.getString("f13")); assertEquals("['cde','abc']", rs.getString("f14")); } }