Java Code Examples for org.apache.parquet.schema.Types#PrimitiveBuilder
The following examples show how to use
org.apache.parquet.schema.Types#PrimitiveBuilder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigSchemaConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testListsOfPrimitive() throws Exception { for (Type.Repetition repetition : Type.Repetition.values()) { for (Type.Repetition valueRepetition : Type.Repetition.values()) { for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) { if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition); if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) value.length(1); GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a"); pigSchemaConverter.convertField(type); // no exceptions, please } } } } }
Example 2
Source File: MetadataReader.java From presto with Apache License 2.0 | 5 votes |
private static void readTypeSchema(Types.GroupBuilder<?> builder, Iterator<SchemaElement> schemaIterator, int typeCount) { for (int i = 0; i < typeCount; i++) { SchemaElement element = schemaIterator.next(); Types.Builder<?, ?> typeBuilder; if (element.type == null) { typeBuilder = builder.group(Repetition.valueOf(element.repetition_type.name())); readTypeSchema((Types.GroupBuilder<?>) typeBuilder, schemaIterator, element.num_children); } else { Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name())); if (element.isSetType_length()) { primitiveBuilder.length(element.type_length); } if (element.isSetPrecision()) { primitiveBuilder.precision(element.precision); } if (element.isSetScale()) { primitiveBuilder.scale(element.scale); } typeBuilder = primitiveBuilder; } if (element.isSetConverted_type()) { typeBuilder.as(getOriginalType(element.converted_type)); } if (element.isSetField_id()) { typeBuilder.id(element.field_id); } typeBuilder.named(element.name.toLowerCase(Locale.ENGLISH)); } }
Example 3
Source File: AvroSchemaConverter190Int96Avro18.java From datacollector with Apache License 2.0 | 4 votes |
private Type convertFieldUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) { LOG.debug("Converting field: {} using LogicalType", fieldName); Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { // Special case handling timestamp until int96 fully supported or logical types correctly supported if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) { LOG.debug("Logical type is a timestamp millis or micros"); builder = Types.primitive(INT96, repetition); } else { builder = Types.primitive(INT64, repetition); } } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null && !(logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros)) { if (logicalType instanceof LogicalTypes.Decimal) { builder = builder.as(DECIMAL) .precision(((LogicalTypes.Decimal) logicalType).getPrecision()) .scale(((LogicalTypes.Decimal) logicalType).getScale()); } else { OriginalType annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example 4
Source File: AvroSchemaConverter190Int96Avro17.java From datacollector with Apache License 2.0 | 4 votes |
private Type convertFieldWithoutUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) { LOG.debug("Converting field: {} without using LogicalType", fieldName); Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); String logicalType = schema.getProp(AvroTypeUtil.LOGICAL_TYPE); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { // Special case handling timestamp until int96 fully supported or logical types correctly supported if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) { LOG.debug("Logical type is a timestamp millis or micros"); builder = Types.primitive(INT96, repetition); } else { builder = Types.primitive(INT64, repetition); } } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null && !(AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType))) { if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL) .precision(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_PRECISION).getIntValue()) .scale(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_SCALE).getIntValue()); } else { OriginalType annotation = convertLogicalTypeStr(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example 5
Source File: AvroSchemaConverterLogicalTypesPre19.java From datacollector with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { builder = Types.primitive(INT64, repetition); } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence String logicalType = schema.getProp(LOGICAL_TYPE); if (logicalType != null) { if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL) .precision(schema.getJsonProp(LOGICAL_PROP_PRECISION).getIntValue()) .scale(schema.getJsonProp(LOGICAL_PROP_SCALE).getIntValue()); } else { OriginalType annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example 6
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
private void buildChildren(Types.GroupBuilder builder, Iterator<SchemaElement> schema, int childrenCount, List<ColumnOrder> columnOrders, int columnCount) { for (int i = 0; i < childrenCount; i++) { SchemaElement schemaElement = schema.next(); // Create Parquet Type. Types.Builder childBuilder; if (schemaElement.type != null) { Types.PrimitiveBuilder primitiveBuilder = builder.primitive( getPrimitive(schemaElement.type), fromParquetRepetition(schemaElement.repetition_type)); if (schemaElement.isSetType_length()) { primitiveBuilder.length(schemaElement.type_length); } if (schemaElement.isSetPrecision()) { primitiveBuilder.precision(schemaElement.precision); } if (schemaElement.isSetScale()) { primitiveBuilder.scale(schemaElement.scale); } if (columnOrders != null) { org.apache.parquet.schema.ColumnOrder columnOrder = fromParquetColumnOrder(columnOrders.get(columnCount)); // As per parquet format 2.4.0 no UNDEFINED order is supported. So, set undefined column order for the types // where ordering is not supported. if (columnOrder.getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER && (schemaElement.type == Type.INT96 || schemaElement.converted_type == ConvertedType.INTERVAL)) { columnOrder = org.apache.parquet.schema.ColumnOrder.undefined(); } primitiveBuilder.columnOrder(columnOrder); } childBuilder = primitiveBuilder; } else { childBuilder = builder.group(fromParquetRepetition(schemaElement.repetition_type)); buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children, columnOrders, columnCount); } if (schemaElement.isSetLogicalType()) { childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType)); } if (schemaElement.isSetConverted_type()) { OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement).toOriginalType(); OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null; if (!originalType.equals(newOriginalType)) { if (newOriginalType != null) { LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", schemaElement.converted_type, schemaElement.logicalType); } childBuilder.as(originalType); } } if (schemaElement.isSetField_id()) { childBuilder.id(schemaElement.field_id); } childBuilder.named(schemaElement.name); ++columnCount; } }
Example 7
Source File: AvroSchemaConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { builder = Types.primitive(INT64, repetition); } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES); } else { builder = Types.primitive(BINARY, repetition).as(stringType()); } } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(enumType()); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null) { if (logicalType instanceof LogicalTypes.Decimal) { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision())); } else { LogicalTypeAnnotation annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }