org.apache.parquet.schema.ConversionPatterns Java Examples
The following examples show how to use
org.apache.parquet.schema.ConversionPatterns.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * * @param alias * @param fieldSchema * @return an optional group containing one repeated group field (key, value) * @throws FrontendException */ private GroupType convertMap(String alias, FieldSchema fieldSchema) { Schema innerSchema = fieldSchema.schema; if (innerSchema == null || innerSchema.size() != 1) { throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema); } FieldSchema innerField = null; try { innerField = innerSchema.getField(0); } catch (FrontendException fe) { throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe); } Type convertedValue = convertWithName(innerField, "value"); return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"), convertedValue); }
Example #2
Source File: SingleLevelArraySchemaConverter.java From presto with Apache License 2.0 | 5 votes |
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition) { Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED); Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo()); return ConversionPatterns.mapType(repetition, name, keyType, valueType); }
Example #3
Source File: HiveSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) { final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED); final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo()); return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType); }
Example #4
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * @param name * @param fieldSchema * @return an optional group containing one repeated group field * @throws FrontendException */ private GroupType convertBag(String name, FieldSchema fieldSchema) throws FrontendException { FieldSchema innerField = fieldSchema.schema.getField(0); return ConversionPatterns.listType( Repetition.OPTIONAL, name, convertTuple(name(innerField.alias, "bag"), innerField, Repetition.REPEATED)); }
Example #5
Source File: AvroSchemaConverter190Int96Avro18.java From datacollector with Apache License 2.0 | 4 votes |
private Type convertFieldUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) { LOG.debug("Converting field: {} using LogicalType", fieldName); Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { // Special case handling timestamp until int96 fully supported or logical types correctly supported if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) { LOG.debug("Logical type is a timestamp millis or micros"); builder = Types.primitive(INT96, repetition); } else { builder = Types.primitive(INT64, repetition); } } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null && !(logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros)) { if (logicalType instanceof LogicalTypes.Decimal) { builder = builder.as(DECIMAL) .precision(((LogicalTypes.Decimal) logicalType).getPrecision()) .scale(((LogicalTypes.Decimal) logicalType).getScale()); } else { OriginalType annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example #6
Source File: AvroSchemaConverter190Int96Avro17.java From datacollector with Apache License 2.0 | 4 votes |
private Type convertFieldWithoutUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) { LOG.debug("Converting field: {} without using LogicalType", fieldName); Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); String logicalType = schema.getProp(AvroTypeUtil.LOGICAL_TYPE); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { // Special case handling timestamp until int96 fully supported or logical types correctly supported if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) { LOG.debug("Logical type is a timestamp millis or micros"); builder = Types.primitive(INT96, repetition); } else { builder = Types.primitive(INT64, repetition); } } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null && !(AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType))) { if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL) .precision(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_PRECISION).getIntValue()) .scale(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_SCALE).getIntValue()); } else { OriginalType annotation = convertLogicalTypeStr(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example #7
Source File: AvroSchemaConverterLogicalTypesPre19.java From datacollector with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { builder = Types.primitive(INT64, repetition); } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence String logicalType = schema.getProp(LOGICAL_TYPE); if (logicalType != null) { if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) { builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL) .precision(schema.getJsonProp(LOGICAL_PROP_PRECISION).getIntValue()) .scale(schema.getJsonProp(LOGICAL_PROP_SCALE).getIntValue()); } else { OriginalType annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example #8
Source File: AvroSchemaConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { builder = Types.primitive(INT64, repetition); } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES); } else { builder = Types.primitive(BINARY, repetition).as(stringType()); } } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(enumType()); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null) { if (logicalType instanceof LogicalTypes.Decimal) { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision())); } else { LogicalTypeAnnotation annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }