Java Code Examples for org.apache.avro.Schema#getElementType()
The following examples show how to use
org.apache.avro.Schema#getElementType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConverterTest.java From xml-avro with Apache License 2.0 | 6 votes |
@Test public void arrayOfUnboundedChoiceElements() { String xsd = "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" + " <xs:element name='root'>" + " <xs:complexType>" + " <xs:choice maxOccurs='unbounded'>" + " <xs:element name='s' type='xs:string'/>" + " <xs:element name='i' type='xs:int'/>" + " </xs:choice>" + " </xs:complexType>" + " </xs:element>" + "</xs:schema>"; Schema schema = Converter.createSchema(xsd); assertEquals(Schema.Type.ARRAY, schema.getType()); final Schema elementType = schema.getElementType(); assertEquals(Schema.Type.RECORD, elementType.getType()); }
Example 2
Source File: DatumBuilder.java From xml-avro with Apache License 2.0 | 5 votes |
private Object createArray(Schema schema, Element el) { NodeList childNodes = el.getChildNodes(); Schema elementType = schema.getElementType(); int numElements = childNodes.getLength(); GenericData.Array array = new GenericData.Array(numElements, schema); for (int i = 0; i < numElements; i++) { Element child = (Element) childNodes.item(i); //noinspection unchecked array.add(createNodeDatum(elementType, child, true)); } return array; }
Example 3
Source File: PruneColumns.java From iceberg with Apache License 2.0 | 5 votes |
private Schema complexMapWithIds(Schema map, Integer keyId, Integer valueId) { Schema keyValue = map.getElementType(); if (!AvroSchemaUtil.hasFieldId(keyValue.getField("key")) || !AvroSchemaUtil.hasFieldId(keyValue.getField("value"))) { return AvroSchemaUtil.createMap( keyId, keyValue.getField("key").schema(), valueId, keyValue.getField("value").schema()); } return map; }
Example 4
Source File: ConverterTest.java From xml-avro with Apache License 2.0 | 5 votes |
@Test public void arrayOfChoiceElements() { String xsd = "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" + " <xs:element name='root'>" + " <xs:complexType>" + " <xs:choice maxOccurs='3'>" + " <xs:element name='s' type='xs:string'/>" + " <xs:element name='i' type='xs:int'/>" + " </xs:choice>" + " </xs:complexType>" + " </xs:element>" + "</xs:schema>"; Schema schema = Converter.createSchema(xsd); assertEquals(Schema.Type.ARRAY, schema.getType()); final Schema elementType = schema.getElementType(); assertEquals(Schema.Type.RECORD, elementType.getType()); assertEquals(2, elementType.getFields().size()); String xml = "<root><s>s</s><i>1</i><i>2</i></root>"; GenericData.Array record = Converter.createDatum(schema, xml); Object firstRecord = record.get(0); assertTrue(firstRecord instanceof GenericData.Record); assertEquals("s", ((GenericData.Record) firstRecord).get("s")); Object secondRecord = record.get(1); assertTrue(secondRecord instanceof GenericData.Record); assertEquals(1, ((GenericData.Record) secondRecord).get("i")); Object thirdRecord = record.get(2); assertTrue(thirdRecord instanceof GenericData.Record); assertEquals(2, ((GenericData.Record) thirdRecord).get("i")); }
Example 5
Source File: AvroResolver.java From pxf with Apache License 2.0 | 5 votes |
/** * When an Avro field is actually an array, we resolve the type of the array * element, and for each element in the Avro array, we recursively invoke * the population of {@code List<OneField>} record. * * @param record list of fields to be populated * @param fieldValue field value * @param arraySchema array schema * @return number of populated fields */ int setArrayField(List<OneField> record, Object fieldValue, Schema arraySchema) { Schema typeSchema = arraySchema.getElementType(); GenericData.Array<?> array = (GenericData.Array<?>) fieldValue; int length = array.size(); for (Object o : array) { populateRecord(record, o, typeSchema); } return length; }
Example 6
Source File: AvroUtils.java From envelope with Apache License 2.0 | 4 votes |
/** * Convert Avro Types into their associated DataType. * * @param schemaType Avro Schema.Type * @return DataType representation */ public static DataType dataTypeFor(Schema schemaType) { LOG.trace("Converting Schema[{}] to DataType", schemaType); // Unwrap "optional" unions to the base type boolean isOptional = isNullable(schemaType); if (isOptional) { // if only 2 items in the union, then "unwrap," otherwise, it's a full union and should be rendered as such if (schemaType.getTypes().size() == 2) { LOG.trace("Unwrapping simple 'optional' union for {}", schemaType); for (Schema s : schemaType.getTypes()) { if (s.getType().equals(NULL)) { continue; } // Unwrap schemaType = s; break; } } } // Convert supported LogicalTypes if (null != schemaType.getLogicalType()) { LogicalType logicalType = schemaType.getLogicalType(); switch (logicalType.getName()) { case "date" : return DataTypes.DateType; case "timestamp-millis" : return DataTypes.TimestampType; case "decimal" : LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; return DataTypes.createDecimalType(decimal.getPrecision(), decimal.getScale()); default: // Pass-thru LOG.warn("Unsupported LogicalType[{}], continuing with underlying base type", logicalType.getName()); } } switch (schemaType.getType()) { case RECORD: // StructType List<StructField> structFieldList = Lists.newArrayListWithCapacity(schemaType.getFields().size()); for (Field f : schemaType.getFields()) { structFieldList.add(DataTypes.createStructField(f.name(), dataTypeFor(f.schema()), isNullable(f.schema()))); } return DataTypes.createStructType(structFieldList); case ARRAY: Schema elementType = schemaType.getElementType(); return DataTypes.createArrayType(dataTypeFor(elementType), isNullable(elementType)); case MAP: Schema valueType = schemaType.getValueType(); return DataTypes.createMapType(DataTypes.StringType, dataTypeFor(valueType), isNullable(valueType)); case UNION: // StructType of members List<StructField> unionFieldList = Lists.newArrayListWithCapacity(schemaType.getTypes().size()); int m = 0; for (Schema u : schemaType.getTypes()) { unionFieldList.add(DataTypes.createStructField("member" + m++, dataTypeFor(u), isNullable(u))); } return DataTypes.createStructType(unionFieldList); case FIXED: case BYTES: return DataTypes.BinaryType; case ENUM: case STRING: return DataTypes.StringType; case INT: return DataTypes.IntegerType; case LONG: return DataTypes.LongType; case FLOAT: return DataTypes.FloatType; case DOUBLE: return DataTypes.DoubleType; case BOOLEAN: return DataTypes.BooleanType; case NULL: return DataTypes.NullType; default: throw new RuntimeException(String.format("Unrecognized or unsupported Avro Type conversion: %s", schemaType)); } }
Example 7
Source File: AvroRowSerializationSchema.java From flink with Apache License 2.0 | 4 votes |
private Object convertFlinkType(Schema schema, Object object) { if (object == null) { return null; } switch (schema.getType()) { case RECORD: if (object instanceof Row) { return convertRowToAvroRecord(schema, (Row) object); } throw new IllegalStateException("Row expected but was: " + object.getClass()); case ENUM: return new GenericData.EnumSymbol(schema, object.toString()); case ARRAY: final Schema elementSchema = schema.getElementType(); final Object[] array = (Object[]) object; final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema); for (Object element : array) { convertedArray.add(convertFlinkType(elementSchema, element)); } return convertedArray; case MAP: final Map<?, ?> map = (Map<?, ?>) object; final Map<Utf8, Object> convertedMap = new HashMap<>(); for (Map.Entry<?, ?> entry : map.entrySet()) { convertedMap.put( new Utf8(entry.getKey().toString()), convertFlinkType(schema.getValueType(), entry.getValue())); } return convertedMap; case UNION: final List<Schema> types = schema.getTypes(); final int size = types.size(); final Schema actualSchema; if (size == 2 && types.get(0).getType() == Schema.Type.NULL) { actualSchema = types.get(1); } else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) { actualSchema = types.get(0); } else if (size == 1) { actualSchema = types.get(0); } else { // generic type return object; } return convertFlinkType(actualSchema, object); case FIXED: // check for logical type if (object instanceof BigDecimal) { return new GenericData.Fixed( schema, convertFromDecimal(schema, (BigDecimal) object)); } return new GenericData.Fixed(schema, (byte[]) object); case STRING: return new Utf8(object.toString()); case BYTES: // check for logical type if (object instanceof BigDecimal) { return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object)); } return ByteBuffer.wrap((byte[]) object); case INT: // check for logical types if (object instanceof Date) { return convertFromDate(schema, (Date) object); } else if (object instanceof LocalDate) { return convertFromDate(schema, Date.valueOf((LocalDate) object)); } else if (object instanceof Time) { return convertFromTime(schema, (Time) object); } else if (object instanceof LocalTime) { return convertFromTime(schema, Time.valueOf((LocalTime) object)); } return object; case LONG: // check for logical type if (object instanceof Timestamp) { return convertFromTimestamp(schema, (Timestamp) object); } else if (object instanceof LocalDateTime) { return convertFromTimestamp(schema, Timestamp.valueOf((LocalDateTime) object)); } return object; case FLOAT: case DOUBLE: case BOOLEAN: return object; } throw new RuntimeException("Unsupported Avro type:" + schema); }
Example 8
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 4 votes |
public void writeJavaArray(GroupType schema, Schema avroSchema, Class<?> arrayClass, Object value) { Class<?> elementClass = arrayClass.getComponentType(); if (!elementClass.isPrimitive()) { writeObjectArray(schema, avroSchema, (Object[]) value); return; } switch (avroSchema.getElementType().getType()) { case BOOLEAN: Preconditions.checkArgument(elementClass == boolean.class, "Cannot write as boolean array: " + arrayClass.getName()); writeBooleanArray((boolean[]) value); break; case INT: if (elementClass == byte.class) { writeByteArray((byte[]) value); } else if (elementClass == char.class) { writeCharArray((char[]) value); } else if (elementClass == short.class) { writeShortArray((short[]) value); } else if (elementClass == int.class) { writeIntArray((int[]) value); } else { throw new IllegalArgumentException( "Cannot write as an int array: " + arrayClass.getName()); } break; case LONG: Preconditions.checkArgument(elementClass == long.class, "Cannot write as long array: " + arrayClass.getName()); writeLongArray((long[]) value); break; case FLOAT: Preconditions.checkArgument(elementClass == float.class, "Cannot write as float array: " + arrayClass.getName()); writeFloatArray((float[]) value); break; case DOUBLE: Preconditions.checkArgument(elementClass == double.class, "Cannot write as double array: " + arrayClass.getName()); writeDoubleArray((double[]) value); break; default: throw new IllegalArgumentException("Cannot write " + avroSchema.getElementType() + " array: " + arrayClass.getName()); } }
Example 9
Source File: AvroSchemaManager.java From spork with Apache License 2.0 | 4 votes |
/** * Initialize given a schema */ protected void init(String namespace, Schema schema, boolean ignoreNameMap) { /* put to map[type name]=>schema */ if (isNamedSchema(schema)) { String typeName = schema.getName(); if (typeName2Schema.containsKey(typeName)) AvroStorageLog.warn("Duplicate schemas defined for type:" + typeName + ". will ignore the second one:" + schema); else { AvroStorageLog.details("add " + schema.getName() + "=" + schema + " to type2Schema"); typeName2Schema.put(schema.getName(), schema); } } /* put field schema to map[field name]=>schema*/ if (schema.getType().equals(Type.RECORD)) { List<Field> fields = schema.getFields(); for (Field field : fields) { Schema fieldSchema = field.schema(); String name = (namespace == null) ? field.name() : namespace + "." + field.name(); if (!ignoreNameMap) { if (name2Schema.containsKey(name)) AvroStorageLog.warn("Duplicate schemas defined for alias:" + name + ". Will ignore the second one:"+ fieldSchema); else { AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema"); name2Schema.put(name, fieldSchema); } } init(name, fieldSchema, ignoreNameMap); } } else if (schema.getType().equals(Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(schema)) { Schema realSchema = AvroStorageUtils.getAcceptedType(schema); init(namespace, realSchema, ignoreNameMap); } else { List<Schema> list = schema.getTypes(); for (Schema s : list) { init(namespace, s, true); } } } else if (schema.getType().equals(Type.ARRAY)) { Schema elemSchema = schema.getElementType(); init(namespace, elemSchema, true); } else if (schema.getType().equals(Type.MAP)) { Schema valueSchema = schema.getValueType(); init(namespace, valueSchema, true); } }
Example 10
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 4 votes |
public void writeJavaArray(GroupType schema, Schema avroSchema, Class<?> arrayClass, Object value) { Class<?> elementClass = arrayClass.getComponentType(); if (!elementClass.isPrimitive()) { writeObjectArray(schema, avroSchema, (Object[]) value); return; } switch (avroSchema.getElementType().getType()) { case BOOLEAN: Preconditions.checkArgument(elementClass == boolean.class, "Cannot write as boolean array: " + arrayClass.getName()); writeBooleanArray((boolean[]) value); break; case INT: if (elementClass == byte.class) { writeByteArray((byte[]) value); } else if (elementClass == char.class) { writeCharArray((char[]) value); } else if (elementClass == short.class) { writeShortArray((short[]) value); } else if (elementClass == int.class) { writeIntArray((int[]) value); } else { throw new IllegalArgumentException( "Cannot write as an int array: " + arrayClass.getName()); } break; case LONG: Preconditions.checkArgument(elementClass == long.class, "Cannot write as long array: " + arrayClass.getName()); writeLongArray((long[]) value); break; case FLOAT: Preconditions.checkArgument(elementClass == float.class, "Cannot write as float array: " + arrayClass.getName()); writeFloatArray((float[]) value); break; case DOUBLE: Preconditions.checkArgument(elementClass == double.class, "Cannot write as double array: " + arrayClass.getName()); writeDoubleArray((double[]) value); break; default: throw new IllegalArgumentException("Cannot write " + avroSchema.getElementType() + " array: " + arrayClass.getName()); } }
Example 11
Source File: Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603(Schema readerSchema) { this.readerSchema = readerSchema; this.arrayArrayElemSchema0 = readerSchema.getElementType(); this.field0 = arrayArrayElemSchema0 .getField("field").schema(); }
Example 12
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 4 votes |
/** * Convert a schema with field name to a pig schema */ private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords) throws IOException { AvroStorageLog.details("InConvert avro schema with field name " + fieldName); Schema.Type avroType = in.getType(); ResourceFieldSchema fieldSchema = new ResourceFieldSchema(); fieldSchema.setName(fieldName); if (avroType.equals(Schema.Type.RECORD)) { AvroStorageLog.details("convert to a pig tuple"); if (visitedRecords.contains(in)) { fieldSchema.setType(DataType.BYTEARRAY); } else { visitedRecords.add(in); fieldSchema.setType(DataType.TUPLE); ResourceSchema tupleSchema = new ResourceSchema(); List<Schema.Field> fields = in.getFields(); ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()]; int index = 0; for (Schema.Field field : fields) { childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords); } tupleSchema.setFields(childFields); fieldSchema.setSchema(tupleSchema); visitedRecords.remove(in); } } else if (avroType.equals(Schema.Type.ARRAY)) { AvroStorageLog.details("convert array to a pig bag"); fieldSchema.setType(DataType.BAG); Schema elemSchema = in.getElementType(); ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords); add2BagSchema(fieldSchema, subFieldSchema); } else if (avroType.equals(Schema.Type.MAP)) { AvroStorageLog.details("convert map to a pig map"); fieldSchema.setType(DataType.MAP); } else if (avroType.equals(Schema.Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(in)) { Schema acceptSchema = AvroStorageUtils.getAcceptedType(in); ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords); fieldSchema.setType(realFieldSchema.getType()); fieldSchema.setSchema(realFieldSchema.getSchema()); } else throw new IOException("Do not support generic union:" + in); } else if (avroType.equals(Schema.Type.FIXED)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.BOOLEAN)) { fieldSchema.setType(DataType.BOOLEAN); } else if (avroType.equals(Schema.Type.BYTES)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.DOUBLE)) { fieldSchema.setType(DataType.DOUBLE); } else if (avroType.equals(Schema.Type.ENUM)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.FLOAT)) { fieldSchema.setType(DataType.FLOAT); } else if (avroType.equals(Schema.Type.INT)) { fieldSchema.setType(DataType.INTEGER); } else if (avroType.equals(Schema.Type.LONG)) { fieldSchema.setType(DataType.LONG); } else if (avroType.equals(Schema.Type.STRING)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.NULL)) { // value of NULL is always NULL fieldSchema.setType(DataType.INTEGER); } else { throw new IOException("Unsupported avro type:" + avroType); } return fieldSchema; }
Example 13
Source File: Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603(Schema readerSchema) { this.readerSchema = readerSchema; this.arrayArrayElemSchema0 = readerSchema.getElementType(); this.field0 = arrayArrayElemSchema0 .getField("field").schema(); }
Example 14
Source File: Array_of_UNION_GenericDeserializer_585074122056792963_585074122056792963.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Array_of_UNION_GenericDeserializer_585074122056792963_585074122056792963(Schema readerSchema) { this.readerSchema = readerSchema; this.arrayArrayElemSchema0 = readerSchema.getElementType(); this.arrayElemOptionSchema0 = arrayArrayElemSchema0 .getTypes().get(1); this.field0 = arrayElemOptionSchema0 .getField("field").schema(); }
Example 15
Source File: AvroRowSerializationSchema.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private Object convertFlinkType(Schema schema, Object object) { if (object == null) { return null; } switch (schema.getType()) { case RECORD: if (object instanceof Row) { return convertRowToAvroRecord(schema, (Row) object); } throw new IllegalStateException("Row expected but was: " + object.getClass()); case ENUM: return new GenericData.EnumSymbol(schema, object.toString()); case ARRAY: final Schema elementSchema = schema.getElementType(); final Object[] array = (Object[]) object; final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema); for (Object element : array) { convertedArray.add(convertFlinkType(elementSchema, element)); } return convertedArray; case MAP: final Map<?, ?> map = (Map<?, ?>) object; final Map<Utf8, Object> convertedMap = new HashMap<>(); for (Map.Entry<?, ?> entry : map.entrySet()) { convertedMap.put( new Utf8(entry.getKey().toString()), convertFlinkType(schema.getValueType(), entry.getValue())); } return convertedMap; case UNION: final List<Schema> types = schema.getTypes(); final int size = types.size(); final Schema actualSchema; if (size == 2 && types.get(0).getType() == Schema.Type.NULL) { actualSchema = types.get(1); } else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) { actualSchema = types.get(0); } else if (size == 1) { actualSchema = types.get(0); } else { // generic type return object; } return convertFlinkType(actualSchema, object); case FIXED: // check for logical type if (object instanceof BigDecimal) { return new GenericData.Fixed( schema, convertFromDecimal(schema, (BigDecimal) object)); } return new GenericData.Fixed(schema, (byte[]) object); case STRING: return new Utf8(object.toString()); case BYTES: // check for logical type if (object instanceof BigDecimal) { return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object)); } return ByteBuffer.wrap((byte[]) object); case INT: // check for logical types if (object instanceof Date) { return convertFromDate(schema, (Date) object); } else if (object instanceof Time) { return convertFromTime(schema, (Time) object); } return object; case LONG: // check for logical type if (object instanceof Timestamp) { return convertFromTimestamp(schema, (Timestamp) object); } return object; case FLOAT: case DOUBLE: case BOOLEAN: return object; } throw new RuntimeException("Unsupported Avro type:" + schema); }
Example 16
Source File: AvroNestedFieldGetter.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
/** * Builds a list of field objects holding paths corresponding to the leaf primitives in an Avro schema. * * @param s the schema to process * @return a List of field objects * @throws KettleException if a problem occurs */ public static List<? extends IAvroInputField> getLeafFields( Schema s ) throws KettleException { if ( s == null ) { return null; } List<AvroInputField> fields = new ArrayList<>(); String root = ""; if ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) { while ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) { if ( s.getType() == Schema.Type.ARRAY ) { root += "[0]"; s = s.getElementType(); } else { root += KEY; s = s.getValueType(); } } } if ( s.getType() == Schema.Type.RECORD ) { processRecord( root, s, fields ); } else if ( s.getType() == Schema.Type.UNION ) { processUnion( root, s, fields ); } else { // our top-level array/map structure bottoms out with primitive types // we'll create one zero-indexed path through to a primitive - the // user can copy and paste the path if they want to extract other // indexes out to separate Kettle fields AvroInputField newField = createAvroField( root, s ); if ( newField != null ) { fields.add( newField ); } } for ( int i = 0; i < fields.size() - 1; i++ ) { AvroInputField field = fields.get( i ); boolean duplicateName; int suffix = 0; String fieldName; do { fieldName = field.getPentahoFieldName(); if ( suffix > 0 ) { fieldName = fieldName + "-" + Integer.toString( suffix ); } duplicateName = false; for ( int j = i + 1; ( j < fields.size() ) && !duplicateName; j++ ) { duplicateName = fieldName.equals( fields.get( j ).getPentahoFieldName() ); } suffix++; } while ( duplicateName ); field.setPentahoFieldName( fieldName ); } return fields; }
Example 17
Source File: AvroArray.java From transport with BSD 2-Clause "Simplified" License | 4 votes |
public AvroArray(Schema arraySchema, int size) { _elementSchema = arraySchema.getElementType(); _genericArray = new GenericData.Array(size, arraySchema); }
Example 18
Source File: AvroArray.java From transport with BSD 2-Clause "Simplified" License | 4 votes |
public AvroArray(GenericArray<Object> genericArray, Schema arraySchema) { _genericArray = genericArray; _elementSchema = arraySchema.getElementType(); }
Example 19
Source File: ParquetRecordReaderTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testNestedArrayGroup() throws IOException { Schema nestedArraySchema = unWrapSchema(NESTED_SCHEMA.getField("nestedArray").schema()); Preconditions.checkState(nestedArraySchema.getType().equals(Schema.Type.ARRAY)); Schema arrayItemSchema = nestedArraySchema.getElementType(); GenericRecord item = new GenericRecordBuilder(arrayItemSchema) .set("type", "nested") .set("value", 1L).build(); ImmutableList.Builder<GenericRecord> list = ImmutableList.builder(); list.add(item); GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA) .set("nestedArray", list.build()) .set("foo", 34L).build(); Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record)); MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA); ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema); InputFile inputFile = HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig); ParquetReadOptions options = ParquetReadOptions.builder().build(); ParquetFileReader fileReader = new ParquetFileReader(inputFile, options); rowReader.initialize(fileReader, testConfig); assertFalse(rowReader.reachEnd()); Row row = rowReader.nextRecord(); assertEquals(7, row.getArity()); assertEquals(34L, row.getField(0)); Object[] result = (Object[]) row.getField(6); assertEquals(1, result.length); Row nestedRow = (Row) result[0]; assertEquals("nested", nestedRow.getField(0)); assertEquals(1L, nestedRow.getField(1)); }
Example 20
Source File: ParquetRecordReaderTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testNestedArrayGroup() throws IOException { Schema nestedArraySchema = unWrapSchema(NESTED_SCHEMA.getField("nestedArray").schema()); Preconditions.checkState(nestedArraySchema.getType().equals(Schema.Type.ARRAY)); Schema arrayItemSchema = nestedArraySchema.getElementType(); GenericRecord item = new GenericRecordBuilder(arrayItemSchema) .set("type", "nested") .set("value", 1L).build(); ImmutableList.Builder<GenericRecord> list = ImmutableList.builder(); list.add(item); GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA) .set("nestedArray", list.build()) .set("foo", 34L).build(); Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record)); MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA); ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema); InputFile inputFile = HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig); ParquetReadOptions options = ParquetReadOptions.builder().build(); ParquetFileReader fileReader = new ParquetFileReader(inputFile, options); rowReader.initialize(fileReader, testConfig); assertFalse(rowReader.reachEnd()); Row row = rowReader.nextRecord(); assertEquals(7, row.getArity()); assertEquals(34L, row.getField(0)); Object[] result = (Object[]) row.getField(6); assertEquals(1, result.length); Row nestedRow = (Row) result[0]; assertEquals("nested", nestedRow.getField(0)); assertEquals(1L, nestedRow.getField(1)); }