org.apache.avro.generic.GenericRecordBuilder Java Examples
The following examples show how to use
org.apache.avro.generic.GenericRecordBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test public void shouldReadJavaStringKeyedMapOfRecords() { // given Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); Schema mapRecordSchema = Schema.createMap(recordSchema); GenericData.setStringType(mapRecordSchema, GenericData.StringType.String); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema); subRecordBuilder.set("field", "abc"); Map<String, GenericData.Record> recordsMap = new HashMap<>(); recordsMap.put("1", subRecordBuilder.build()); recordsMap.put("2", subRecordBuilder.build()); // when Map<String, GenericRecord> mapWithStringKeys = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, mapWithStringKeys.size()); Assert.assertEquals("abc", mapWithStringKeys.get("1").get("field").toString()); Assert.assertEquals("abc", mapWithStringKeys.get("2").get("field").toString()); }
Example #2
Source File: KeyValueUtilsTest.java From components with Apache License 2.0 | 6 votes |
/** * From the input: {"a": "a", "b": "b", "c": "c"} * * Extract elements: "a", "b" and "c" * * The result should be: * * key: {"a": "a", "b": "b", "c": "c"} * * value: null */ @Test public void test_EverythingIsAKey() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build(); List<String> keyList = Arrays.asList("a", "b", "c"); String transformedIndexedRecord = ("{'key': {'a': 'a', 'b': 'b', 'c': 'c'}, " + "'value': {}}").replaceAll("\\'", "\""); IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord, SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList)); assertEquals(transformedIndexedRecord, outputRecord.toString()); Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema()); String mergedRecord = ("{'a': 'a', 'b': 'b', 'c': 'c'}").replaceAll("\\'", "\""); assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString()); }
Example #3
Source File: AvroRecordWriterTest.java From data-highway with Apache License 2.0 | 6 votes |
@Test public void typical() throws Exception { Schema schema = SchemaBuilder .builder() .record("record") .fields() .requiredLong("id") .requiredString("name") .endRecord(); Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build(); ByteArrayOutputStream output = new ByteArrayOutputStream(); Factory factory = new Factory(CodecFactory.nullCodec()); RecordWriter writer = factory.create(schema, output); writer.write(value); writer.close(); SeekableInput input = new SeekableByteArrayInput(output.toByteArray()); DatumReader<Record> datumReader = new GenericDatumReader<>(schema); DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader); assertThat(dataFileReader.next(), is(value)); assertThat(dataFileReader.hasNext(), is(false)); dataFileReader.close(); }
Example #4
Source File: KeyValueUtilsTest.java From components with Apache License 2.0 | 6 votes |
/** * From the input: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} * * No extracted element. * * The result should be: * * key: empty * * value: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} */ @Test public void test_Hierarchical_EverythingIsAValue() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputHierarchicalSchema) // .set("name", "testdata") // .build(); inputRecord.put("data", new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build()); String transformedIndexedRecord = ("{'key': {}, " + "'value': {'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}}").replaceAll("\\'", "\""); IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord, SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), new ArrayList<String>())); assertEquals(transformedIndexedRecord, outputRecord.toString()); Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema()); String mergedRecord = ("{'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}").replaceAll("\\'", "\""); assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString()); }
Example #5
Source File: KeyValueUtils.java From components with Apache License 2.0 | 6 votes |
/** * Use a Schema to generate a hierarchical GenericRecord that contains only null values. * * @param schema the parent schema of the field to set as null * @param fieldName the name of the field to set as null * @return if fieldName is a Record of the schema, the method will return a GenericRecord with any leaf set as null, * otherwise return null */ public static IndexedRecord generateEmptyRecord(Schema schema, String fieldName) { if (schema.getType().equals(Type.RECORD)) { Schema unwrappedSchema = getUnwrappedSchema(schema.getField(fieldName)); if (unwrappedSchema.getType().equals(Type.RECORD)) { GenericRecordBuilder outputRecord = new GenericRecordBuilder(unwrappedSchema); for (Field field : unwrappedSchema.getFields()) { IndexedRecord value = generateEmptyRecord(unwrappedSchema, field.name()); outputRecord.set(field.name(), value); } return outputRecord.build(); } else { return null; } } else { return null; } }
Example #6
Source File: KeyValueUtilsTest.java From components with Apache License 2.0 | 6 votes |
/** * From the input: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} * * Extract elements: "name", "data.a", "data.b" and "data.c" * * The result should be: * * key: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}} * * value: null */ @Test public void test_Hierarchical_EverythingIsAKey() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputHierarchicalSchema) // .set("name", "testdata") // .build(); inputRecord.put("data", new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build()); List<String> keyList = Arrays.asList("name", "data"); String transformedIndexedRecord = ("{'key': {'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}, " + "'value': {}}").replaceAll("\\'", "\""); IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord, SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList)); assertEquals(transformedIndexedRecord, outputRecord.toString()); Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema()); String mergedRecord = ("{'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}").replaceAll("\\'", "\""); assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString()); }
Example #7
Source File: ConfluentRegistryCompatibleResourceTest.java From registry with Apache License 2.0 | 6 votes |
@Test public void testConfluentSerDes() throws Exception { org.apache.avro.Schema schema = new org.apache.avro.Schema.Parser().parse(GENERIC_TEST_RECORD_SCHEMA); GenericRecord record = new GenericRecordBuilder(schema).set("field1", "some value").set("field2", "some other value").build(); Map<String, Object> config = new HashMap<>(); config.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, rootTarget.getUri().toString()); KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer(); kafkaAvroSerializer.configure(config, false); byte[] bytes = kafkaAvroSerializer.serialize("topic", record); KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(); kafkaAvroDeserializer.configure(config, false); GenericRecord result = (GenericRecord) kafkaAvroDeserializer.deserialize("topic", bytes); LOG.info(result.toString()); }
Example #8
Source File: KafkaAvroSerdesTest.java From registry with Apache License 2.0 | 6 votes |
@Test public void testGenericSerializedGenericDeserialized() { String topic = "topic"; Map<String, Object> config = new HashMap<>(); config.put(AvroSnapshotDeserializer.SPECIFIC_AVRO_READER, false); KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient); kafkaAvroDeserializer.configure(config, false); KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer(schemaRegistryClient); kafkaAvroSerializer.configure(config, false); GenericRecord record = new GenericRecordBuilder(schema).set("field1", "some value").set("field2", "some other value").build(); byte[] bytes = kafkaAvroSerializer.serialize(topic , record); Object o = kafkaAvroDeserializer.deserialize(topic, bytes); checkGenericSerializedGenericDeserializedEquals(record, o); Headers headers = new RecordHeaders(); bytes = kafkaAvroSerializer.serialize(topic, headers, record); o = kafkaAvroDeserializer.deserialize(topic, headers, bytes); checkGenericSerializedGenericDeserializedEquals(record, o); }
Example #9
Source File: FastDatumWriterTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldCreateGenericDatumReader() throws IOException { Schema recordSchema = createRecord("TestSchema", createPrimitiveUnionFieldSchema("test", Schema.Type.STRING)); FastGenericDatumWriter<GenericRecord> fastGenericDatumReader = new FastGenericDatumWriter<>( recordSchema, cache); GenericRecordBuilder recordBuilder = new GenericRecordBuilder(recordSchema); recordBuilder.set("test", "test"); // when fastGenericDatumReader.write(recordBuilder.build(), EncoderFactory.get().directBinaryEncoder(new ByteArrayOutputStream(), null)); // then FastSerializer<GenericRecord> fastGenericSerializer = (FastSerializer<GenericRecord>) cache .getFastGenericSerializer(recordSchema); fastGenericSerializer = (FastSerializer<GenericRecord>) cache.getFastGenericSerializer(recordSchema); Assert.assertNotNull(fastGenericSerializer); Assert.assertNotEquals(2, fastGenericSerializer.getClass().getDeclaredMethods().length); }
Example #10
Source File: FastGenericSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldWriteEnum() { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"}); Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema), createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema))); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); // when GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build())); // then Assert.assertEquals("A", record.get("testEnum").toString()); Assert.assertEquals("A", record.get("testEnumUnion").toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString()); }
Example #11
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldReadEnum() { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"}); Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema), createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema))); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals("A", record.get("testEnum").toString()); Assert.assertEquals("A", record.get("testEnumUnion").toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString()); }
Example #12
Source File: AvroUtilsTest.java From beam with Apache License 2.0 | 5 votes |
private static GenericRecord getGenericRecord() { LogicalType decimalType = LogicalTypes.decimal(Integer.MAX_VALUE) .addToSchema(org.apache.avro.Schema.create(Type.BYTES)) .getLogicalType(); ByteBuffer encodedDecimal = new Conversions.DecimalConversion().toBytes(BIG_DECIMAL, null, decimalType); return new GenericRecordBuilder(getAvroSchema()) .set("bool", true) .set("int", 43) .set("long", 44L) .set("float", (float) 44.1) .set("double", (double) 44.2) .set("string", new Utf8("string")) .set("bytes", ByteBuffer.wrap(BYTE_ARRAY)) .set("decimal", encodedDecimal) .set("timestampMillis", DATE_TIME.getMillis()) .set("row", getSubGenericRecord("row")) .set("array", ImmutableList.of(getSubGenericRecord("array"), getSubGenericRecord("array"))) .set( "map", ImmutableMap.of( new Utf8("k1"), getSubGenericRecord("map"), new Utf8("k2"), getSubGenericRecord("map"))) .build(); }
Example #13
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 5 votes |
@Test public void shouldReadSubRecordField() { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); Schema recordSchema = createRecord("test", createUnionField("record", subRecordSchema), createField("record1", subRecordSchema), createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecordSchema); subRecordBuilder.set("subField", "abc"); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("record", subRecordBuilder.build()); builder.set("record1", subRecordBuilder.build()); builder.set("field", "abc"); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals("abc", ((GenericRecord) record.get("record")).get("subField").toString()); Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record")).getSchema().hashCode()); Assert.assertEquals("abc", ((GenericRecord) record.get("record1")).get("subField").toString()); Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record1")).getSchema().hashCode()); Assert.assertEquals("abc", record.get("field").toString()); }
Example #14
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 5 votes |
@Test(expected = FastDeserializerGeneratorException.class) public void shouldNotReadStrippedEnum() { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B", "C"}); Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema)); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "C")); Schema enumSchema1 = createEnumSchema("testEnum", new String[]{"A", "B"}); Schema recordSchema1 = createRecord("testRecord", createField("testEnum", enumSchema1)); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema1, serializeGeneric(builder.build())); }
Example #15
Source File: AvroUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred * from the Beam schema on the row. */ public static GenericRecord toGenericRecord( Row row, @Nullable org.apache.avro.Schema avroSchema) { Schema beamSchema = row.getSchema(); // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row // schema. if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) { throw new IllegalArgumentException( "AVRO schema doesn't match row schema. Row schema " + beamSchema + ". AVRO schema + " + avroSchema); } if (avroSchema == null) { avroSchema = toAvroSchema(beamSchema); } GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); for (int i = 0; i < beamSchema.getFieldCount(); ++i) { Schema.Field field = beamSchema.getField(i); builder.set( field.getName(), genericFromBeamField( field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i))); } return builder.build(); }
Example #16
Source File: TypeConverterUtils.java From components with Apache License 2.0 | 5 votes |
/** * Copy fields value from inputRecord to outputRecordBuilder * * @param inputRecord * @param outputRecordBuilder */ public static void copyFieldsValues(IndexedRecord inputRecord, GenericRecordBuilder outputRecordBuilder) { List<Schema.Field> fields = inputRecord.getSchema().getFields(); for (Schema.Field field : fields) { outputRecordBuilder.set(field.name(), inputRecord.get(field.pos())); } }
Example #17
Source File: KeyValueUtils.java From components with Apache License 2.0 | 5 votes |
/** * Generate a new Index Record which is the filtered result of the input record. * * The user can freely remove column, add empty column or change the place of column in the same hierarchical level. * * @return the new record */ public static IndexedRecord extractIndexedRecord(IndexedRecord inputRecord, Schema outputSchema) { GenericRecordBuilder outputRecord = new GenericRecordBuilder(outputSchema); Schema inputSchema = getUnwrappedSchema(inputRecord); for (Field field : outputSchema.getFields()) { if (inputSchema.getField(field.name()) != null) { // The column was existing on the input record, we forward it to the output record. Object inputValue = inputRecord.get(inputSchema.getField(field.name()).pos()); // The current column can be a Record (an hierarchical sub-object) or directly a value. // If we are on a record, we need to recursively do the process // if we are on a object, we save it to the output. if (inputValue instanceof Record) { // The sub-schema at this level is a union of "empty" and a record, // so we need to get the true sub-schema Schema inputChildSchema = getUnwrappedSchema(inputSchema.getField(field.name())); Schema outputChildSchema = getUnwrappedSchema(outputSchema.getField(field.name())); if (inputChildSchema.getType().equals(Type.RECORD) && outputChildSchema.getType().equals(Type.RECORD)) { Object childRecord = extractIndexedRecord((IndexedRecord) inputValue, outputChildSchema); outputRecord.set(field.name(), childRecord); } } else { outputRecord.set(field.name(), inputValue); } } else { // element not found => set to the value and its hierarchy to null outputRecord.set(field.name(), KeyValueUtils.generateEmptyRecord(outputSchema, field.name())); } } return outputRecord.build(); }
Example #18
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 5 votes |
public static GenericRecord toGenericRecord( final ValueWithSequence<GenericRecord> valueWithSequence ) { final GenericRecord value = valueWithSequence.value(); final Schema schema = schemaCache.computeIfAbsent(value.getSchema(), k -> valueWithSequenceSchema(value)); final GenericRecordBuilder builder = new GenericRecordBuilder(schema); return builder .set(VALUE, value) .set(SEQUENCE, valueWithSequence.sequence().getSeq()) .build(); }
Example #19
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 5 votes |
static GenericRecord toGenericRecord( final AggregateUpdate<GenericRecord> aggregateUpdate, final Schema aggregateSchema ) { final Schema schema = schemaCache.computeIfAbsent(aggregateSchema, AggregateUpdateAvroHelper::generateSchema); final GenericRecordBuilder builder = new GenericRecordBuilder(schema); return builder .set(AGGREGATION, aggregateUpdate.aggregate()) .set(SEQUENCE, aggregateUpdate.sequence().getSeq()) .build(); }
Example #20
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 5 votes |
static <K> GenericRecord toCommandResponse( final CommandResponse<GenericRecord> commandResponse) { final GenericRecord key = commandResponse.aggregateKey(); final Schema schema = commandResponseSchema(key); final Schema resultSchema = schema.getField(RESULT).schema(); final Schema responseFailureSchema = resultSchema.getTypes().get(0); final Schema reasonSchema = responseFailureSchema.getField(REASON).schema(); final Schema responseSuccessSchema = resultSchema.getTypes().get(1); return new GenericRecordBuilder(schema) .set(AGGREGATE_KEY, commandResponse.aggregateKey()) .set(READ_SEQUENCE, commandResponse.readSequence().getSeq()) .set(COMMAND_ID, commandResponse.commandId().id().toString()) .set(RESULT, commandResponse.sequenceResult().fold( reasons -> new GenericRecordBuilder(responseFailureSchema) .set(REASON, fromReason(reasonSchema, reasons.head())) .set(ADDITIONAL_REASONS, reasons.tail() .stream() .map(reason -> fromReason(reasonSchema, reason)) .collect(Collectors.toList())) .build(), sequence -> new GenericRecordBuilder(responseSuccessSchema) .set(WRITE_SEQUENCE, sequence.getSeq()) .build() )) .build(); }
Example #21
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 5 votes |
static GenericRecord toGenericRecord( final CommandId commandResponseKey ) { final GenericRecordBuilder builder = new GenericRecordBuilder(schema); return builder .set(COMMAND_ID, commandResponseKey.id().toString()) .build(); }
Example #22
Source File: ICMPParquetPacketWriterImpl.java From entrada with GNU General Public License v3.0 | 5 votes |
@Override public Partition write(Row row, String server) { rowCounter++; Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(row.getTs().getTime()); // convert to avro GenericRecordBuilder builder = recordBuilder(ICMP_AVRO_SCHEMA); // map all the columns in the row to the avro record fields row.getColumns().stream().forEach(c -> { if (hasField(c.getName())) { builder.set(c.getName(), c.getValue()); } }); // create the actual record and write to parquet file GenericRecord record = builder.build(); Partition partition = Partition .builder() .year(cal.get(Calendar.YEAR)) .month(cal.get(Calendar.MONTH) + 1) .day(cal.get(Calendar.DAY_OF_MONTH)) .dns(false) .server(server) .build(); writer.write(record, schema(ICMP_AVRO_SCHEMA), partition); return partition; }
Example #23
Source File: KeyValueUtils.java From components with Apache License 2.0 | 5 votes |
/** * Transform a indexedRecord to match the associated key-value schema * * @param record a indexed record * @param kvSchema its associated key value schema * @return the key-value */ public static IndexedRecord transformToKV(IndexedRecord record, Schema kvSchema) { Schema keySchema = kvSchema.getField(RECORD_KEY_PREFIX).schema(); IndexedRecord keyIndexRecord = extractIndexedRecord(record, keySchema); Schema valueSchema = kvSchema.getField(RECORD_VALUE_PREFIX).schema(); IndexedRecord valueIndexRecord = extractIndexedRecord(record, valueSchema); GenericRecordBuilder outputRecord = new GenericRecordBuilder(kvSchema); outputRecord.set(RECORD_KEY_PREFIX, keyIndexRecord); outputRecord.set(RECORD_VALUE_PREFIX, valueIndexRecord); return outputRecord.build(); }
Example #24
Source File: RegistryAvroDeserializationSchemaTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSpecificRecordReadMoreFieldsThanWereWritten() throws IOException { Schema smallerUserSchema = new Schema.Parser().parse( "{\"namespace\": \"org.apache.flink.formats.avro.generated\",\n" + " \"type\": \"record\",\n" + " \"name\": \"SimpleRecord\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"}" + " ]\n" + "}]"); RegistryAvroDeserializationSchema<SimpleRecord> deserializer = new RegistryAvroDeserializationSchema<>( SimpleRecord.class, null, () -> in -> smallerUserSchema ); GenericData.Record smallUser = new GenericRecordBuilder(smallerUserSchema) .set("name", "someName") .build(); SimpleRecord simpleRecord = deserializer.deserialize(writeRecord( smallUser, smallerUserSchema)); assertEquals("someName", simpleRecord.getName().toString()); assertNull(simpleRecord.getOptionalField()); }
Example #25
Source File: ParquetRecordReaderTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMapGroup() throws IOException { Preconditions.checkState(unWrapSchema(NESTED_SCHEMA.getField("spamMap").schema()) .getType().equals(Schema.Type.MAP)); ImmutableMap.Builder<String, String> map = ImmutableMap.builder(); map.put("testKey", "testValue"); GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA) .set("foo", 32L) .set("spamMap", map.build()) .build(); Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record)); MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA); ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema); InputFile inputFile = HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig); ParquetReadOptions options = ParquetReadOptions.builder().build(); ParquetFileReader fileReader = new ParquetFileReader(inputFile, options); rowReader.initialize(fileReader, testConfig); assertFalse(rowReader.reachEnd()); Row row = rowReader.nextRecord(); assertEquals(7, row.getArity()); assertEquals(32L, row.getField(0)); Map<?, ?> result = (Map<?, ?>) row.getField(1); assertEquals(result.get("testKey").toString(), "testValue"); assertTrue(rowReader.reachEnd()); }
Example #26
Source File: ImportFromAvroTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void booleans() throws Exception { SchemaBuilder.RecordBuilder<Schema> record = SchemaBuilder.record("booleans"); SchemaBuilder.FieldAssembler<Schema> fieldAssembler = record.fields(); fieldAssembler // Primary key. .requiredLong("id") // Boolean columns. .optionalBoolean("optional_boolean") .requiredBoolean("required_boolean") .optionalString("optional_string_boolean") .requiredString("required_string_boolean"); Schema schema = fieldAssembler.endRecord(); String spannerSchema = "CREATE TABLE `AvroTable` (" + "`id` INT64 NOT NULL," + "`optional_boolean` BOOL," + "`required_boolean` BOOL NOT NULL," + "`optional_string_boolean` BOOL," + "`required_string_boolean` BOOL NOT NULL," + ") PRIMARY KEY (`id`)"; runTest(schema, spannerSchema, Arrays.asList(new GenericRecordBuilder(schema) .set("id", 1L) .set("required_boolean", true) .set("optional_boolean", false) .set("required_string_boolean", "FALSE") .set("optional_string_boolean", "TRUE") .build(), new GenericRecordBuilder(schema) .set("id", 2L) .set("required_boolean", false) .set("optional_boolean", true) .set("required_string_boolean", "true") .set("optional_string_boolean", "f") .build())); }
Example #27
Source File: SchemaGeneratorUtilsTest.java From components with Apache License 2.0 | 5 votes |
/** * From the input: {"a": "a", "b": "b", "c": "c"} * * no extracted element. * * The result should be: * * key: empty * * value: {"a": "a", "b": "b", "c": "c"} */ @Test public void test_EverythingIsAValue() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build(); List<String> keyList = new ArrayList<String>(); String keyOutput = ("{'type':'record','name':'inputRow','fields':[]}").replaceAll("\\'", "\""); assertEquals(keyOutput, SchemaGeneratorUtils.extractKeys(inputRecord.getSchema(), keyList).toString()); String valueOutput = ("{'type':'record','name':'value_inputRow','fields':[" + "{'name':'a','type':['null','string'],'default':null}," + "{'name':'b','type':['null','string'],'default':null}," + "{'name':'c','type':['null','string'],'default':null}]}").replaceAll("\\'", "\""); assertEquals(valueOutput, SchemaGeneratorUtils.extractValues(inputRecord.getSchema(), keyList).toString()); Schema kvSchema = SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList); assertEquals(generateKVOutput(keyOutput, valueOutput), kvSchema.toString()); String mergedSchema = ("{'type':'record','name':'inputRow','fields':[{'name':'a','type':['null','string'],'default':null}," + "{'name':'b','type':['null','string'],'default':null},{'name':'c','type':['null','string'],'default':null}]}") .replaceAll("\\'", "\""); assertEquals(mergedSchema, SchemaGeneratorUtils.mergeKeyValues(kvSchema).toString()); }
Example #28
Source File: AvroRecordConverterTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void booleanArray() { String colName = "arrayofboolean"; Schema schema = SchemaBuilder.record("record") .fields() .requiredLong("id") .name(colName) .type() .optional() .array() .items() .booleanType() .endRecord(); // Null field GenericRecord avroRecord = new GenericRecordBuilder(schema).set("id", 0).build(); Optional<List<Boolean>> result = AvroRecordConverter.readBoolArray(avroRecord, BOOLEAN, colName); assertFalse(result.isPresent()); // Convert from boolean to Boolean. avroRecord = new GenericRecordBuilder(schema).set("id", 0).set(colName, booleanArray).build(); result = AvroRecordConverter.readBoolArray(avroRecord, BOOLEAN, colName); assertArrayEquals(booleanArray.toArray(), result.get().toArray()); // Convert from String to boolean. List<Utf8> stringBooleanArray = Arrays.asList(new Utf8("true"), new Utf8("false"), null); avroRecord = new GenericRecordBuilder(schema).set("id", 0).set(colName, stringBooleanArray).build(); result = AvroRecordConverter.readBoolArray(avroRecord, STRING, colName); assertArrayEquals(booleanArray.toArray(), result.get().toArray()); }
Example #29
Source File: FieldSelectorUtil.java From components with Apache License 2.0 | 5 votes |
/** * Generate an indexed record from a given {@code Schema} and its associated data as a map. * * @param fields the fields that should be inside the generated indexed record * @param schema the schema of the indexed record * @return an indexed record */ public static IndexedRecord generateIndexedRecord(Map<String, Object> fields, Schema schema) { GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema); for (Entry<String, Object> field : fields.entrySet()) { recordBuilder.set(field.getKey(), field.getValue()); } return recordBuilder.build(); }
Example #30
Source File: SchemaGeneratorUtilsTest.java From components with Apache License 2.0 | 5 votes |
/** * From the input: {"a": "a", "b": "b", "c": "c"} * * Extract elements: "c", "a" and "d" * * The result should be: * * key: {"c": "c", "a": "a", "d": null} * * value: {""b", "b""} */ @Test public void test_SimpleLevel() throws Exception { GenericRecord inputRecord = new GenericRecordBuilder(inputSimpleSchema) // .set("a", "a") // .set("b", "b") // .set("c", "c") // .build(); List<String> keyList = Arrays.asList("c", "a", "d"); String keyOutput = ("{'type':'record','name':'inputRow','fields':[" + "{'name':'c','type':['null','string'],'default':null}," + "{'name':'a','type':['null','string'],'default':null}," + "{'name':'d','type':['null','string'],'doc':'','default':''}]}").replaceAll("\\'", "\""); assertEquals(keyOutput, SchemaGeneratorUtils.extractKeys(inputRecord.getSchema(), keyList).toString()); String valueOutput = ("{'type':'record','name':'value_inputRow','fields':[" + "{'name':'b','type':['null','string'],'default':null}]}").replaceAll("\\'", "\""); assertEquals(valueOutput, SchemaGeneratorUtils.extractValues(inputRecord.getSchema(), keyList).toString()); Schema kvSchema = SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList); assertEquals(generateKVOutput(keyOutput, valueOutput), kvSchema.toString()); String mergedSchema = ("{'type':'record','name':'inputRow','fields':[{'name':'c','type':['null','string'],'default':null}," + "{'name':'a','type':['null','string'],'default':null},{'name':'d','type':['null','string'],'doc':'','default':''}," + "{'name':'b','type':['null','string'],'default':null}]}").replaceAll("\\'", "\""); assertEquals(mergedSchema, SchemaGeneratorUtils.mergeKeyValues(kvSchema).toString()); }