Java Code Examples for org.apache.avro.generic.GenericRecordBuilder#set()
The following examples show how to use
org.apache.avro.generic.GenericRecordBuilder#set() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldReadEnum() { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"}); Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema), createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema))); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals("A", record.get("testEnum").toString()); Assert.assertEquals("A", record.get("testEnumUnion").toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString()); }
Example 2
Source File: FastGenericSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldWriteEnum() { // given Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"}); Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema), createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema), createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema))); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A")); builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A"))); // when GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build())); // then Assert.assertEquals("A", record.get("testEnum").toString()); Assert.assertEquals("A", record.get("testEnumUnion").toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString()); Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString()); }
Example 3
Source File: FastDatumWriterTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void shouldCreateGenericDatumReader() throws IOException { Schema recordSchema = createRecord("TestSchema", createPrimitiveUnionFieldSchema("test", Schema.Type.STRING)); FastGenericDatumWriter<GenericRecord> fastGenericDatumReader = new FastGenericDatumWriter<>( recordSchema, cache); GenericRecordBuilder recordBuilder = new GenericRecordBuilder(recordSchema); recordBuilder.set("test", "test"); // when fastGenericDatumReader.write(recordBuilder.build(), EncoderFactory.get().directBinaryEncoder(new ByteArrayOutputStream(), null)); // then FastSerializer<GenericRecord> fastGenericSerializer = (FastSerializer<GenericRecord>) cache .getFastGenericSerializer(recordSchema); fastGenericSerializer = (FastSerializer<GenericRecord>) cache.getFastGenericSerializer(recordSchema); Assert.assertNotNull(fastGenericSerializer); Assert.assertNotEquals(2, fastGenericSerializer.getClass().getDeclaredMethods().length); }
Example 4
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 5 votes |
@Test public void shouldReadMapOfRecords() { // given Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); Schema mapRecordSchema = Schema.createMap(recordSchema); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema); subRecordBuilder.set("field", "abc"); Map<String, GenericData.Record> recordsMap = new HashMap<>(); recordsMap.put("1", subRecordBuilder.build()); recordsMap.put("2", subRecordBuilder.build()); // when Map<Utf8, GenericRecord> map = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, map.size()); Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString()); Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString()); // given mapRecordSchema = Schema.createMap(createUnionSchema(recordSchema)); // when map = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, map.size()); Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString()); Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString()); }
Example 5
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 5 votes |
@Test public void shouldReadSubRecordField() { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); Schema recordSchema = createRecord("test", createUnionField("record", subRecordSchema), createField("record1", subRecordSchema), createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecordSchema); subRecordBuilder.set("subField", "abc"); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("record", subRecordBuilder.build()); builder.set("record1", subRecordBuilder.build()); builder.set("field", "abc"); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals("abc", ((GenericRecord) record.get("record")).get("subField").toString()); Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record")).getSchema().hashCode()); Assert.assertEquals("abc", ((GenericRecord) record.get("record1")).get("subField").toString()); Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record1")).getSchema().hashCode()); Assert.assertEquals("abc", record.get("field").toString()); }
Example 6
Source File: TestMetricsRowGroupFilter.java From iceberg with Apache License 2.0 | 5 votes |
@BeforeClass public static void createInputFile() throws IOException { if (PARQUET_FILE.exists()) { Assert.assertTrue(PARQUET_FILE.delete()); } OutputFile outFile = Files.localOutput(PARQUET_FILE); try (FileAppender<Record> appender = Parquet.write(outFile) .schema(FILE_SCHEMA) .build()) { GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table")); // create 50 records for (int i = 0; i < 50; i += 1) { builder.set("_id", 30 + i); // min=30, max=79, num-nulls=0 builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats builder.set("_required", "req"); // required, always non-null builder.set("_all_nulls", null); // never non-null builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values builder.set("_no_nulls", ""); // optional, but always non-null appender.add(builder.build()); } } InputFile inFile = Files.localInput(PARQUET_FILE); try (ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile))) { Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size()); ROW_GROUP_METADATA = reader.getRowGroups().get(0); PARQUET_SCHEMA = reader.getFileMetaData().getSchema(); } PARQUET_FILE.deleteOnExit(); }
Example 7
Source File: FieldSelectorUtil.java From components with Apache License 2.0 | 5 votes |
/** * Generate an indexed record from a given {@code Schema} and its associated data as a map. * * @param fields the fields that should be inside the generated indexed record * @param schema the schema of the indexed record * @return an indexed record */ public static IndexedRecord generateIndexedRecord(Map<String, Object> fields, Schema schema) { GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema); for (Entry<String, Object> field : fields.entrySet()) { recordBuilder.set(field.getKey(), field.getValue()); } return recordBuilder.build(); }
Example 8
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace * when the namespace is not specified. */ @Test public void testAvroToEntityDefaultNamespace() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("STRING")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, 1); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test AvroToEntity noNamespaceConverter = AvroToEntity.newBuilder() .setEntityKind(entityKind) .setUniqueNameColumn(uniqueNameColumn) .build(); Entity outputEntity = noNamespaceConverter.apply(inputBqData); // Assess results assertTrue(outputEntity.hasKey()); assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId()); }
Example 9
Source File: Generator.java From avro-random-generator with Do What The F*ck You Want To Public License | 5 votes |
private GenericRecord generateRecord(Schema schema) { GenericRecordBuilder builder = new GenericRecordBuilder(schema); for (Schema.Field field : schema.getFields()) { builder.set(field, generateObject(field.schema())); } return builder.build(); }
Example 10
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the * unique name column is integer. */ @Test public void testAvroToEntityIntegerIdColumn() throws Exception { // Create test data List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName(idField).setType("INTEGER")); fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING")); TableSchema bqSchema = new TableSchema().setFields(fields); Schema avroSchema = new Schema.Parser() .parse( String.format( avroSchemaTemplate, new StringBuilder() .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc)) .append(",") .append(generateShortStringField()) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(idField, idFieldValueInt); builder.set(shortStringField, shortStringFieldValue); Record record = builder.build(); SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema); // Run the test Entity outputEntity = converter.apply(inputBqData); assertTrue(outputEntity.hasKey()); assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName()); validateMetadata(outputEntity); }
Example 11
Source File: AvroData.java From apicurio-registry with Apache License 2.0 | 5 votes |
private static Object maybeWrapSchemaless(Schema schema, Object value, String typeField) { if (schema != null) { return value; } GenericRecordBuilder builder = new GenericRecordBuilder(ANYTHING_SCHEMA); if (value != null) { builder.set(typeField, value); } return builder.build(); }
Example 12
Source File: Generator.java From avro-random-generator with Do What The F*ck You Want To Public License | 5 votes |
@SuppressWarnings("unchecked") private Object wrapOption(Schema schema, Object option) { if (schema.getType() == Schema.Type.BYTES && option instanceof String) { option = ByteBuffer.wrap(((String) option).getBytes(Charset.defaultCharset())); } else if (schema.getType() == Schema.Type.FLOAT && option instanceof Double) { option = ((Double) option).floatValue(); } else if (schema.getType() == Schema.Type.LONG && option instanceof Integer) { option = ((Integer) option).longValue(); } else if (schema.getType() == Schema.Type.ARRAY && option instanceof Collection) { option = new GenericData.Array(schema, (Collection) option); } else if (schema.getType() == Schema.Type.ENUM && option instanceof String) { option = new GenericData.EnumSymbol(schema, (String) option); } else if (schema.getType() == Schema.Type.FIXED && option instanceof String) { option = new GenericData.Fixed(schema, ((String) option).getBytes(Charset.defaultCharset())); } else if (schema.getType() == Schema.Type.RECORD && option instanceof Map) { Map optionMap = (Map) option; GenericRecordBuilder optionBuilder = new GenericRecordBuilder(schema); for (Schema.Field field : schema.getFields()) { if (optionMap.containsKey(field.name())) { optionBuilder.set(field, optionMap.get(field.name())); } } option = optionBuilder.build(); } return option; }
Example 13
Source File: BigQueryConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Generates an Avro record with a single field. */ private Record generateSingleFieldAvroRecord( String name, String type, String description, Object value) { Schema avroSchema = new Schema.Parser() .parse( String.format( AVRO_SCHEMA_TEMPLATE, new StringBuilder() .append(String.format(avroFieldTemplate, name, type, description)) .toString())); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); builder.set(name, value); return builder.build(); }
Example 14
Source File: Generator.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
private GenericRecord generateRecord(Schema schema) { GenericRecordBuilder builder = new GenericRecordBuilder(schema); for (Schema.Field field : schema.getFields()) { builder.set(field, generateObject(field.schema(), field.name())); } return builder.build(); }
Example 15
Source File: AvroUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred * from the Beam schema on the row. */ public static GenericRecord toGenericRecord( Row row, @Nullable org.apache.avro.Schema avroSchema) { Schema beamSchema = row.getSchema(); // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row // schema. if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) { throw new IllegalArgumentException( "AVRO schema doesn't match row schema. Row schema " + beamSchema + ". AVRO schema + " + avroSchema); } if (avroSchema == null) { avroSchema = toAvroSchema(beamSchema); } GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); for (int i = 0; i < beamSchema.getFieldCount(); ++i) { Schema.Field field = beamSchema.getField(i); builder.set( field.getName(), genericFromBeamField( field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i))); } return builder.build(); }
Example 16
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldSkipRemovedRecord() { // given Schema subRecord1Schema = createRecord("subRecord", createPrimitiveFieldSchema("test1", Schema.Type.STRING), createPrimitiveFieldSchema("test2", Schema.Type.STRING)); Schema subRecord2Schema = createRecord("subRecord2", createPrimitiveFieldSchema("test1", Schema.Type.STRING), createPrimitiveFieldSchema("test2", Schema.Type.STRING)); Schema record1Schema = createRecord("test", createField("subRecord1", subRecord1Schema), createField("subRecord2", subRecord2Schema), createUnionField("subRecord3", subRecord2Schema), createField("subRecord4", subRecord1Schema)); Schema record2Schema = createRecord("test", createField("subRecord1", subRecord1Schema), createField("subRecord4", subRecord1Schema)); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecord1Schema); subRecordBuilder.set("test1", "abc"); subRecordBuilder.set("test2", "def"); GenericRecordBuilder subRecordBuilder2 = new GenericRecordBuilder(subRecord2Schema); subRecordBuilder2.set("test1", "ghi"); subRecordBuilder2.set("test2", "jkl"); GenericRecordBuilder builder = new GenericRecordBuilder(record1Schema); builder.set("subRecord1", subRecordBuilder.build()); builder.set("subRecord2", subRecordBuilder2.build()); builder.set("subRecord3", subRecordBuilder2.build()); builder.set("subRecord4", subRecordBuilder.build()); // when GenericRecord record = deserializeGenericFast(record1Schema, record2Schema, serializeGeneric(builder.build())); // then Assert.assertEquals("abc", ((GenericRecord) record.get("subRecord1")).get("test1").toString()); Assert.assertEquals("def", ((GenericRecord) record.get("subRecord1")).get("test2").toString()); Assert.assertEquals("abc", ((GenericRecord) record.get("subRecord4")).get("test1").toString()); Assert.assertEquals("def", ((GenericRecord) record.get("subRecord4")).get("test2").toString()); }
Example 17
Source File: TestDictionaryRowGroupFilter.java From iceberg with Apache License 2.0 | 4 votes |
@BeforeClass public static void createInputFile() throws IOException { if (PARQUET_FILE.exists()) { Assert.assertTrue(PARQUET_FILE.delete()); } // build struct field schema org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(_structFieldType); OutputFile outFile = Files.localOutput(PARQUET_FILE); try (FileAppender<Record> appender = Parquet.write(outFile) .schema(FILE_SCHEMA) .build()) { GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table")); // create 20 copies of each record to ensure dictionary-encoding for (int copy = 0; copy < 20; copy += 1) { // create 50 records for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) { builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0 builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats builder.set("_required", "req"); // required, always non-null builder.set("_all_nulls", null); // never non-null builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values builder.set("_no_nulls", ""); // optional, but always non-null builder.set("_non_dict", UUID.randomUUID().toString()); // not dictionary-encoded Record structNotNull = new Record(structSchema); structNotNull.put("_int_field", INT_MIN_VALUE + i); builder.set("_struct_not_null", structNotNull); // struct with int appender.add(builder.build()); } } } InputFile inFile = Files.localInput(PARQUET_FILE); ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile)); Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size()); rowGroupMetadata = reader.getRowGroups().get(0); parquetSchema = reader.getFileMetaData().getSchema(); dictionaryStore = reader.getNextDictionaryReader(); PARQUET_FILE.deleteOnExit(); }
Example 18
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldReadPrimitives() { // given Schema javaLangStringSchema = Schema.create(Schema.Type.STRING); GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String); Schema recordSchema = createRecord("testRecord", createField("testInt", Schema.create(Schema.Type.INT)), createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT), createField("testString", Schema.create(Schema.Type.STRING)), createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING), createField("testJavaString", javaLangStringSchema), createUnionField("testJavaStringUnion", javaLangStringSchema), createField("testLong", Schema.create(Schema.Type.LONG)), createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG), createField("testDouble", Schema.create(Schema.Type.DOUBLE)), createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE), createField("testFloat", Schema.create(Schema.Type.FLOAT)), createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT), createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)), createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN), createField("testBytes", Schema.create(Schema.Type.BYTES)), createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES)); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testInt", 1); builder.set("testIntUnion", 1); builder.set("testString", "aaa"); builder.set("testStringUnion", "aaa"); builder.set("testJavaString", "aaa"); builder.set("testJavaStringUnion", "aaa"); builder.set("testLong", 1L); builder.set("testLongUnion", 1L); builder.set("testDouble", 1.0); builder.set("testDoubleUnion", 1.0); builder.set("testFloat", 1.0f); builder.set("testFloatUnion", 1.0f); builder.set("testBoolean", true); builder.set("testBooleanUnion", true); builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02})); builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02})); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals(1, record.get("testInt")); Assert.assertEquals(1, record.get("testIntUnion")); Assert.assertEquals("aaa", record.get("testString").toString()); Assert.assertEquals("aaa", record.get("testStringUnion").toString()); Assert.assertEquals("aaa", record.get("testJavaString")); Assert.assertEquals("aaa", record.get("testJavaStringUnion")); Assert.assertEquals(1L, record.get("testLong")); Assert.assertEquals(1L, record.get("testLongUnion")); Assert.assertEquals(1.0, record.get("testDouble")); Assert.assertEquals(1.0, record.get("testDoubleUnion")); Assert.assertEquals(1.0f, record.get("testFloat")); Assert.assertEquals(1.0f, record.get("testFloatUnion")); Assert.assertEquals(true, record.get("testBoolean")); Assert.assertEquals(true, record.get("testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion")); }
Example 19
Source File: TestMetricsRowGroupFilter.java From iceberg with Apache License 2.0 | 4 votes |
private void createParquetInputFile() throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } // build struct field schema org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(_structFieldType); OutputFile outFile = Files.localOutput(parquetFile); try (FileAppender<Record> appender = Parquet.write(outFile) .schema(FILE_SCHEMA) .build()) { GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table")); // create 50 records for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) { builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0 builder.set("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats // in Parquet builder.set("_required", "req"); // required, always non-null builder.set("_all_nulls", null); // never non-null builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values builder.set("_no_nulls", ""); // optional, but always non-null builder.set("_str", i + "str" + i); Record structNotNull = new Record(structSchema); structNotNull.put("_int_field", INT_MIN_VALUE + i); builder.set("_struct_not_null", structNotNull); // struct with int appender.add(builder.build()); } } InputFile inFile = Files.localInput(parquetFile); try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) { Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size()); rowGroupMetadata = reader.getRowGroups().get(0); parquetSchema = reader.getFileMetaData().getSchema(); } parquetFile.deleteOnExit(); }
Example 20
Source File: FastGenericSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings("unchecked") public void shouldWriteSubRecordComplexCollectionsField() { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); Schema recordSchema = createRecord( "test", createArrayFieldSchema("recordsArrayMap", Schema.createMap(createUnionSchema(subRecordSchema))), createMapFieldSchema("recordsMapArray", Schema.createArray(createUnionSchema(subRecordSchema))), createUnionField("recordsArrayMapUnion", Schema.createArray(Schema.createMap(createUnionSchema(subRecordSchema)))), createUnionField("recordsMapArrayUnion", Schema.createMap(Schema.createArray(createUnionSchema(subRecordSchema))))); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecordSchema); subRecordBuilder.set("subField", "abc"); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); List<Map<String, GenericRecord>> recordsArrayMap = new ArrayList<>(); Map<String, GenericRecord> recordMap = new HashMap<>(); recordMap.put("1", subRecordBuilder.build()); recordsArrayMap.add(recordMap); builder.set("recordsArrayMap", recordsArrayMap); builder.set("recordsArrayMapUnion", recordsArrayMap); Map<String, List<GenericRecord>> recordsMapArray = new HashMap<>(); List<GenericRecord> recordList = new ArrayList<>(); recordList.add(subRecordBuilder.build()); recordsMapArray.put("1", recordList); builder.set("recordsMapArray", recordsMapArray); builder.set("recordsMapArrayUnion", recordsMapArray); // when GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build())); // then Assert.assertEquals("abc", ((List<Map<Utf8, GenericRecord>>) record.get("recordsArrayMap")).get(0).get(new Utf8("1")) .get("subField").toString()); Assert.assertEquals("abc", ((Map<Utf8, List<GenericRecord>>) record.get("recordsMapArray")).get(new Utf8("1")).get(0) .get("subField").toString()); Assert.assertEquals("abc", ((List<Map<Utf8, GenericRecord>>) record.get("recordsArrayMapUnion")).get(0).get(new Utf8("1")) .get("subField").toString()); Assert.assertEquals("abc", ((Map<Utf8, List<GenericRecord>>) record.get("recordsMapArrayUnion")).get(new Utf8("1")).get(0) .get("subField").toString()); }