Java Code Examples for org.apache.avro.generic.GenericRecord#put()
The following examples show how to use
org.apache.avro.generic.GenericRecord#put() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KeyValueUtilsTest.java From components with Apache License 2.0 | 6 votes |
/** * From the input: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} * * Extract elements: "name", "data.a", "data.b" and "data.c" * * The result should be: * * key: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} * * value: null */ @Test public void test_Hierarchical_EverythingIsAKey() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputHierarchicalSchema) // .set("name", "testdata") // .build(); inputRecord.put("data", new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build()); List<String> keyList = Arrays.asList("name", "data"); String transformedIndexedRecord = ("{'key': {'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}, " + "'value': {}}").replaceAll("\\'", "\""); IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord, SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList)); assertEquals(transformedIndexedRecord, outputRecord.toString()); Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema()); String mergedRecord = ("{'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}").replaceAll("\\'", "\""); assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString()); }
Example 2
Source File: StreamlineEventSerializer.java From streamline with Apache License 2.0 | 6 votes |
private static Object getAvroValue(Object input, Schema schema) { if (input instanceof byte[] && Schema.Type.FIXED.equals(schema.getType())) { return new GenericData.Fixed(schema, (byte[]) input); } else if (input instanceof Map && !((Map) input).isEmpty()) { GenericRecord result; result = new GenericData.Record(schema); for (Map.Entry<String, Object> entry: ((Map<String, Object>) input).entrySet()) { result.put(entry.getKey(), getAvroValue(entry.getValue(), schema.getField(entry.getKey()).schema())); } return result; } else if (input instanceof Collection && !((Collection) input).isEmpty()) { // for array even though we(Schema in streamline registry) support different types of elements in an array, avro expects an array // schema to have elements of same type. Hence, for now we will restrict array to have elements of same type. Other option is convert // a streamline Schema Array field to Record in avro. However, with that the issue is that avro Field constructor does not allow a // null name. We could potentiall hack it by plugging in a dummy name like arrayfield, but seems hacky so not taking that path List<Object> values = new ArrayList<>(((Collection) input).size()); for (Object value: (Collection) input) { values.add(getAvroValue(value, schema.getElementType())); } return new GenericData.Array<Object>(schema, values); } else { return input; } }
Example 3
Source File: TestReadParquetAfterSchemaEvolution.java From kite with Apache License 2.0 | 6 votes |
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<GenericRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(DatasetTestUtilities.OLD_VALUE_SCHEMA) .format(Formats.PARQUET) .build(), GenericRecord.class); DatasetWriter<GenericRecord> writer = writerDataset.newWriter(); GenericRecord record = new GenericData.Record(DatasetTestUtilities.OLD_VALUE_SCHEMA); for (long i = 0; i < totalRecords; i++) { record.put("value", Long.valueOf(i)); writer.write(record); } writer.close(); repo.update("ns", "test", new DatasetDescriptor.Builder(writerDataset.getDescriptor()) .schema(Value.class).build()); readerDataset = repo.load("ns", "test", GenericRecord.class); }
Example 4
Source File: AvroStreamsSnapshotDeserializerTest.java From streamline with Apache License 2.0 | 6 votes |
private GenericRecord generateGenericRecord(Schema schema) { GenericRecord addressRecord = new GenericData.Record(schema.getField("address").schema()); long now = System.currentTimeMillis(); addressRecord.put("streetaddress", "streetaddress:" + now); addressRecord.put("city", "city-" + now); addressRecord.put("state", "state-" + now); addressRecord.put("zip", "zip" + now); GenericRecord rootRecord = new GenericData.Record(schema); rootRecord.put("xid", now); rootRecord.put("name", "name-" + now); rootRecord.put("version", 1); rootRecord.put("timestamp", now); rootRecord.put("suit", "SPADES"); rootRecord.put("address", addressRecord); return rootRecord; }
Example 5
Source File: CopyableGenericRecordTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testCopy() throws CopyNotSupportedException { GenericRecord record = new GenericData.Record(new Schema.Parser().parse(AVRO_SCHEMA)); record.put("name", "foo"); record.put("favorite_number", 68); record.put("favorite_colors", Arrays.asList("blue", "black", "red")); CopyableGenericRecord copyableGenericRecord = new CopyableGenericRecord(record); GenericRecord copy = copyableGenericRecord.copy(); Assert.assertEquals(record, copy); copy.put("name", "bar"); Assert.assertNotEquals(record, copy); }
Example 6
Source File: TestHoodieAvroUtils.java From hudi with Apache License 2.0 | 5 votes |
@Test public void testJsonNodeNullWithDefaultValues() { List<Schema.Field> fields = new ArrayList<>(); Schema initialSchema = Schema.createRecord("test_record", "test record", "org.test.namespace", false); Schema.Field field1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field field2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field field3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); fields.add(field1); fields.add(field2); fields.add(field3); initialSchema.setFields(fields); GenericRecord rec = new GenericData.Record(initialSchema); rec.put("key", "val"); rec.put("key1", "val1"); rec.put("key2", "val2"); List<Schema.Field> evolvedFields = new ArrayList<>(); Schema evolvedSchema = Schema.createRecord("evolved_record", "evolved record", "org.evolved.namespace", false); Schema.Field evolvedField1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field evolvedField2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field evolvedField3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); Schema.Field evolvedField4 = new Schema.Field("evolved_field", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); Schema.Field evolvedField5 = new Schema.Field("evolved_field1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); evolvedFields.add(evolvedField1); evolvedFields.add(evolvedField2); evolvedFields.add(evolvedField3); evolvedFields.add(evolvedField4); evolvedFields.add(evolvedField5); evolvedSchema.setFields(evolvedFields); GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, evolvedSchema); //evolvedField4.defaultVal() returns a JsonProperties.Null instance. assertNull(rec1.get("evolved_field")); //evolvedField5.defaultVal() returns null. assertNull(rec1.get("evolved_field1")); }
Example 7
Source File: AvroUtilsTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void overrideNameAndNamespaceTest() throws IOException{ String inputName = "input_name"; String inputNamespace = "input_namespace"; String outputName = "output_name"; String outputNamespace = "output_namespace"; Schema inputRecordSchema = SchemaBuilder.record(inputName).namespace(inputNamespace).fields() .name("integer1") .type().intBuilder().endInt().noDefault() .endRecord(); GenericRecord inputRecord = new GenericData.Record(inputRecordSchema); inputRecord.put("integer1", 10); GenericRecord outputRecord = AvroUtils.overrideNameAndNamespace(inputRecord, outputName, Optional.of(Collections.EMPTY_MAP)); Assert.assertEquals(outputRecord.getSchema().getName(), outputName); Assert.assertEquals(outputRecord.getSchema().getNamespace(), inputNamespace); Assert.assertEquals(outputRecord.get("integer1"), 10); Map<String,String> namespaceOverrideMap = new HashMap<>(); namespaceOverrideMap.put(inputNamespace,outputNamespace); outputRecord = AvroUtils.overrideNameAndNamespace(inputRecord, outputName, Optional.of(namespaceOverrideMap)); Assert.assertEquals(outputRecord.getSchema().getName(), outputName); Assert.assertEquals(outputRecord.getSchema().getNamespace(), outputNamespace); Assert.assertEquals(outputRecord.get("integer1"), 10); }
Example 8
Source File: AvroToBytesConverterTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testSerialization() throws DataConversionException, IOException, SchemaConversionException { Schema inputSchema = new Schema.Parser() .parse(getClass().getClassLoader().getResourceAsStream("converter/bytes_to_avro/test_record_schema.avsc")); AvroToBytesConverter converter = new AvroToBytesConverter(); WorkUnitState state = new WorkUnitState(); converter.init(state); String outputSchema = converter.convertSchema(inputSchema, state); // Write a record twice to make sure nothing goes wrong with caching for (int i = 0; i < 2; i++) { GenericRecord testRecord = new GenericData.Record(inputSchema); testRecord.put("testStr", "testing12" + ((i == 0) ? "3": "4")); testRecord.put("testInt", -2); Iterator<byte[]> records = converter.convertRecord(outputSchema, testRecord, state).iterator(); byte[] record = records.next(); Assert.assertFalse(records.hasNext()); byte[] expectedRecord = IOUtils.toByteArray(getClass().getClassLoader().getResourceAsStream("converter/bytes_to_avro/test_record_binary.avro")); // the serialized record was serialized with testing123 as the string; if we write testing124 out // contents should be the same except for the 10th byte which will be '4' instead of '3' if (i == 1) { expectedRecord[10] = 52; } Assert.assertEquals(outputSchema, inputSchema.toString()); Assert.assertEquals(record, expectedRecord); } }
Example 9
Source File: AvroSpoolDirSourceTestUtil.java From datacollector with Apache License 2.0 | 5 votes |
public static File createAvroDataFile() throws Exception { File f = new File(createTestDir(), "file-0.avro"); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); GenericRecord boss = new GenericData.Record(schema); boss.put("name", "boss"); boss.put("age", 60); boss.put("emails", ImmutableList.of("boss@company.com", "boss2@company.com")); boss.put("boss", null); GenericRecord e3 = new GenericData.Record(schema); e3.put("name", "c"); e3.put("age", 50); e3.put("emails", ImmutableList.of("c@company.com", "c2@company.com")); e3.put("boss", boss); GenericRecord e2 = new GenericData.Record(schema); e2.put("name", "b"); e2.put("age", 40); e2.put("emails", ImmutableList.of("b@company.com", "b2@company.com")); e2.put("boss", boss); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", "a"); e1.put("age", 30); e1.put("emails", ImmutableList.of("a@company.com", "a2@company.com")); e1.put("boss", boss); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, f); dataFileWriter.append(e1); dataFileWriter.append(e2); dataFileWriter.append(e3); dataFileWriter.flush(); dataFileWriter.close(); return f; }
Example 10
Source File: OracleGenericMessage.java From DBus with Apache License 2.0 | 5 votes |
public GenericRecord generateRecord(Schema genericSchema) { GenericRecord record = new GenericData.Record(genericSchema); record.put(NAMESAPCE, this.nameSpace); record.put(SCHEMA_HASH, this.schemaHash); record.put(PAYLOAD, ByteBuffer.wrap(this.payload)); return record; }
Example 11
Source File: AvroSchemaRegistryClientUtil.java From registry with Apache License 2.0 | 5 votes |
public static Object createGenericRecordForCompatDevice() throws IOException { Schema schema = new Schema.Parser().parse(getSchema("/device-compat.avsc")); GenericRecord avroRecord = new GenericData.Record(schema); long now = System.currentTimeMillis(); avroRecord.put("xid", now); avroRecord.put("name", "foo-" + now); avroRecord.put("version", new Random().nextInt()); avroRecord.put("timestamp", now); avroRecord.put("make", "make-" + now); return avroRecord; }
Example 12
Source File: TestAvroExport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
private void addExtraColumns(GenericRecord record, int rowNum, ColumnGenerator[] extraCols) { int colNum = 0; for (ColumnGenerator gen : extraCols) { if (gen.getColumnAvroSchema() != null) { record.put(forIdx(colNum++), gen.getExportValue(rowNum)); } } }
Example 13
Source File: TestHoodieAvroUtils.java From hudi with Apache License 2.0 | 5 votes |
@Test public void testMetadataField() { GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA)); rec.put("_row_key", "key1"); rec.put("non_pii_col", "val1"); rec.put("pii_col", "val2"); rec.put("timestamp", 3.5); GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_METADATA_FIELD)); assertNull(rec1.get("_hoodie_commit_time")); assertNull(rec1.get("nullable_field")); assertNull(rec1.get("nullable_field_wo_default")); }
Example 14
Source File: AvroOutputFormatTest.java From flink with Apache License 2.0 | 5 votes |
private void output(final AvroOutputFormat<GenericRecord> outputFormat, Schema schema) throws IOException { outputFormat.configure(new Configuration()); outputFormat.open(1, 1); for (int i = 0; i < 100; i++) { GenericRecord record = new GenericData.Record(schema); record.put("user_name", "testUser"); record.put("favorite_number", 1); record.put("favorite_color", "blue"); outputFormat.writeRecord(record); } outputFormat.close(); }
Example 15
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 5 votes |
private void writeParquetUsersWithNullableArray(final File parquetFile, int numUsers) throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithNullableArray); // use the schemaWithArray here just to get the schema for the array part of the favorite_colors fields, the overall // schemaWithNullableArray has a union of the array schema and null final Schema favoriteColorsSchema = schemaWithArray.getField("favorite_colors").schema(); try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) { for (int i=0; i < numUsers; i++) { final GenericRecord user = new GenericData.Record(schema); user.put("name", "Bob" + i); user.put("favorite_number", i); final GenericData.Array<String> colors = new GenericData.Array<>(1, favoriteColorsSchema); colors.add("blue" + i); user.put("favorite_color", colors); writer.write(user); } } }
Example 16
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation") public void shouldReadPermutatedEnum(Implementation implementation) { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B", "C", "D", "E"}); Schema recordSchema = createRecord( createField("testEnum", enumSchema), createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema))); GenericRecord originalRecord = new GenericData.Record(recordSchema); originalRecord.put("testEnum", AvroCompatibilityHelper.newEnumSymbol(enumSchema, "A"));//new GenericData.EnumSymbol("A")); originalRecord.put("testEnumUnion", AvroCompatibilityHelper.newEnumSymbol(enumSchema, "B"));//new GenericData.EnumSymbol("B")); originalRecord.put("testEnumArray", Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "C")));//new GenericData.EnumSymbol("C"))); originalRecord.put("testEnumUnionArray", Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "D")));//new GenericData.EnumSymbol("D"))); Schema enumSchema1 = createEnumSchema("testEnum", new String[]{"B", "A", "D", "E", "C"}); Schema recordSchema1 = createRecord( createField("testEnum", enumSchema1), createUnionField("testEnumUnion", enumSchema1), createArrayFieldSchema("testEnumArray", enumSchema1), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema1))); // when GenericRecord record = implementation.decode(recordSchema, recordSchema1, genericDataAsDecoder(originalRecord)); // then Assert.assertEquals("A", record.get("testEnum").toString()); Assert.assertEquals("B", record.get("testEnumUnion").toString()); Assert.assertEquals("C", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString()); Assert.assertEquals("D", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString()); }
Example 17
Source File: TestMergeContent.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void testSimpleAvroConcat() throws IOException, InterruptedException { final TestRunner runner = TestRunners.newTestRunner(new MergeContent()); runner.setProperty(MergeContent.MAX_ENTRIES, "3"); runner.setProperty(MergeContent.MIN_ENTRIES, "3"); runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc")); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final GenericRecord user2 = new GenericData.Record(schema); user2.put("name", "Ben"); user2.put("favorite_number", 7); user2.put("favorite_color", "red"); final GenericRecord user3 = new GenericData.Record(schema); user3.put("name", "John"); user3.put("favorite_number", 5); user3.put("favorite_color", "blue"); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter); final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter); final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter); runner.enqueue(out1.toByteArray()); runner.enqueue(out2.toByteArray()); runner.enqueue(out3.toByteArray()); runner.run(); runner.assertQueueEmpty(); runner.assertTransferCount(MergeContent.REL_MERGED, 1); runner.assertTransferCount(MergeContent.REL_FAILURE, 0); runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3); final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0); bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary"); // create a reader for the merged content byte[] data = runner.getContentAsByteArray(bundle); final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name"); Assert.assertEquals(3, users.size()); Assert.assertTrue(users.containsKey("Alyssa")); Assert.assertTrue(users.containsKey("Ben")); Assert.assertTrue(users.containsKey("John")); }
Example 18
Source File: TestMergeContent.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void testAvroConcatWithDifferentSchemas() throws IOException, InterruptedException { final TestRunner runner = TestRunners.newTestRunner(new MergeContent()); runner.setProperty(MergeContent.MAX_ENTRIES, "3"); runner.setProperty(MergeContent.MIN_ENTRIES, "3"); runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO); final Schema schema1 = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc")); final Schema schema2 = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/place.avsc")); final GenericRecord record1 = new GenericData.Record(schema1); record1.put("name", "Alyssa"); record1.put("favorite_number", 256); final GenericRecord record2 = new GenericData.Record(schema2); record2.put("name", "Some Place"); final GenericRecord record3 = new GenericData.Record(schema1); record3.put("name", "John"); record3.put("favorite_number", 5); record3.put("favorite_color", "blue"); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema1); final ByteArrayOutputStream out1 = serializeAvroRecord(schema1, record1, datumWriter); final ByteArrayOutputStream out2 = serializeAvroRecord(schema2, record2, datumWriter); final ByteArrayOutputStream out3 = serializeAvroRecord(schema1, record3, datumWriter); runner.enqueue(out1.toByteArray()); runner.enqueue(out2.toByteArray()); runner.enqueue(out3.toByteArray()); runner.run(); runner.assertQueueEmpty(); runner.assertTransferCount(MergeContent.REL_MERGED, 1); runner.assertTransferCount(MergeContent.REL_FAILURE, 1); runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3); final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0); bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary"); final byte[] data = runner.getContentAsByteArray(bundle); final Map<String, GenericRecord> users = getGenericRecordMap(data, schema1, "name"); Assert.assertEquals(2, users.size()); Assert.assertTrue(users.containsKey("Alyssa")); Assert.assertTrue(users.containsKey("John")); final MockFlowFile failure = runner.getFlowFilesForRelationship(MergeContent.REL_FAILURE).get(0); final byte[] failureData = runner.getContentAsByteArray(failure); final Map<String, GenericRecord> places = getGenericRecordMap(failureData, schema2, "name"); Assert.assertEquals(1, places.size()); Assert.assertTrue(places.containsKey("Some Place")); }
Example 19
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation") public void shouldReadPrimitives(Implementation implementation) { // given Schema recordSchema = createRecord( createField("testInt", Schema.create(Schema.Type.INT)), createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT), createField("testString", Schema.create(Schema.Type.STRING)), createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING), createField("testLong", Schema.create(Schema.Type.LONG)), createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG), createField("testDouble", Schema.create(Schema.Type.DOUBLE)), createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE), createField("testFloat", Schema.create(Schema.Type.FLOAT)), createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT), createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)), createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN), createField("testBytes", Schema.create(Schema.Type.BYTES)), createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES)); GenericRecord record = new GenericData.Record(recordSchema); record.put("testInt", 1); record.put("testIntUnion", 1); record.put("testString", "aaa"); record.put("testStringUnion", "aaa"); record.put("testLong", 1l); record.put("testLongUnion", 1l); record.put("testDouble", 1.0); record.put("testDoubleUnion", 1.0); record.put("testFloat", 1.0f); record.put("testFloatUnion", 1.0f); record.put("testBoolean", true); record.put("testBooleanUnion", true); record.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02})); record.put("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02})); // when GenericRecord decodedRecord = implementation.decode(recordSchema, recordSchema, genericDataAsDecoder(record)); // then Assert.assertEquals(1, decodedRecord.get("testInt")); Assert.assertEquals(1, decodedRecord.get("testIntUnion")); Assert.assertEquals(new Utf8("aaa"), decodedRecord.get("testString")); Assert.assertEquals(new Utf8("aaa"), decodedRecord.get("testStringUnion")); Assert.assertEquals(1l, decodedRecord.get("testLong")); Assert.assertEquals(1l, decodedRecord.get("testLongUnion")); Assert.assertEquals(1.0, decodedRecord.get("testDouble")); Assert.assertEquals(1.0, decodedRecord.get("testDoubleUnion")); Assert.assertEquals(1.0f, decodedRecord.get("testFloat")); Assert.assertEquals(1.0f, decodedRecord.get("testFloatUnion")); Assert.assertEquals(true, decodedRecord.get("testBoolean")); Assert.assertEquals(true, decodedRecord.get("testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), decodedRecord.get("testBytes")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), decodedRecord.get("testBytesUnion")); }
Example 20
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testWriteAvroWithCustomWriter() throws Exception { SerializableFunction<AvroWriteRequest<InputRecord>, GenericRecord> formatFunction = r -> { GenericRecord rec = new GenericData.Record(r.getSchema()); InputRecord i = r.getElement(); rec.put("strVal", i.strVal()); rec.put("longVal", i.longVal()); rec.put("doubleVal", i.doubleVal()); rec.put("instantVal", i.instantVal().getMillis() * 1000); return rec; }; SerializableFunction<org.apache.avro.Schema, DatumWriter<GenericRecord>> customWriterFactory = s -> new GenericDatumWriter<GenericRecord>() { @Override protected void writeString(org.apache.avro.Schema schema, Object datum, Encoder out) throws IOException { super.writeString(schema, datum.toString() + "_custom", out); } }; p.apply( Create.of( InputRecord.create("test", 1, 1.0, Instant.parse("2019-01-01T00:00:00Z")), InputRecord.create("test2", 2, 2.0, Instant.parse("2019-02-01T00:00:00Z"))) .withCoder(INPUT_RECORD_CODER)) .apply( BigQueryIO.<InputRecord>write() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("strVal").setType("STRING"), new TableFieldSchema().setName("longVal").setType("INTEGER"), new TableFieldSchema().setName("doubleVal").setType("FLOAT"), new TableFieldSchema().setName("instantVal").setType("TIMESTAMP")))) .withTestServices(fakeBqServices) .withAvroWriter(formatFunction, customWriterFactory) .withoutValidation()); p.run(); assertThat( fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder( new TableRow() .set("strVal", "test_custom") .set("longVal", "1") .set("doubleVal", 1.0D) .set("instantVal", "2019-01-01 00:00:00 UTC"), new TableRow() .set("strVal", "test2_custom") .set("longVal", "2") .set("doubleVal", 2.0D) .set("instantVal", "2019-02-01 00:00:00 UTC"))); }