Java Code Examples for org.apache.parquet.schema.Types#PrimitiveBuilder

The following examples show how to use org.apache.parquet.schema.Types#PrimitiveBuilder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testListsOfPrimitive() throws Exception {
  for (Type.Repetition repetition : Type.Repetition.values()) {
    for (Type.Repetition valueRepetition : Type.Repetition.values()) {
      for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) {
        if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
          Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition);
          if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
            value.length(1);
          GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
          pigSchemaConverter.convertField(type); // no exceptions, please
        }
      }
    }
  }
}
 
Example 2
Source File: MetadataReader.java    From presto with Apache License 2.0 5 votes vote down vote up
private static void readTypeSchema(Types.GroupBuilder<?> builder, Iterator<SchemaElement> schemaIterator, int typeCount)
{
    for (int i = 0; i < typeCount; i++) {
        SchemaElement element = schemaIterator.next();
        Types.Builder<?, ?> typeBuilder;
        if (element.type == null) {
            typeBuilder = builder.group(Repetition.valueOf(element.repetition_type.name()));
            readTypeSchema((Types.GroupBuilder<?>) typeBuilder, schemaIterator, element.num_children);
        }
        else {
            Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name()));
            if (element.isSetType_length()) {
                primitiveBuilder.length(element.type_length);
            }
            if (element.isSetPrecision()) {
                primitiveBuilder.precision(element.precision);
            }
            if (element.isSetScale()) {
                primitiveBuilder.scale(element.scale);
            }
            typeBuilder = primitiveBuilder;
        }

        if (element.isSetConverted_type()) {
            typeBuilder.as(getOriginalType(element.converted_type));
        }
        if (element.isSetField_id()) {
            typeBuilder.id(element.field_id);
        }
        typeBuilder.named(element.name.toLowerCase(Locale.ENGLISH));
    }
}
 
Example 3
Source File: AvroSchemaConverter190Int96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  LogicalType logicalType = schema.getLogicalType();

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros)) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      builder = builder.as(DECIMAL)
                       .precision(((LogicalTypes.Decimal) logicalType).getPrecision())
                       .scale(((LogicalTypes.Decimal) logicalType).getScale());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 4
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldWithoutUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} without using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  String logicalType = schema.getProp(AvroTypeUtil.LOGICAL_TYPE);

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) ||
        AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType))) {
    if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
         .precision(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_PRECISION).getIntValue())
         .scale(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalTypeStr(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 5
Source File: AvroSchemaConverterLogicalTypesPre19.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = (Types.PrimitiveBuilder<PrimitiveType>) Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  String logicalType = schema.getProp(LOGICAL_TYPE);
  if (logicalType != null) {
    if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
          .precision(schema.getJsonProp(LOGICAL_PROP_PRECISION).getIntValue())
          .scale(schema.getJsonProp(LOGICAL_PROP_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 6
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private void buildChildren(Types.GroupBuilder builder,
                           Iterator<SchemaElement> schema,
                           int childrenCount,
                           List<ColumnOrder> columnOrders,
                           int columnCount) {
  for (int i = 0; i < childrenCount; i++) {
    SchemaElement schemaElement = schema.next();

    // Create Parquet Type.
    Types.Builder childBuilder;
    if (schemaElement.type != null) {
      Types.PrimitiveBuilder primitiveBuilder = builder.primitive(
          getPrimitive(schemaElement.type),
          fromParquetRepetition(schemaElement.repetition_type));
      if (schemaElement.isSetType_length()) {
        primitiveBuilder.length(schemaElement.type_length);
      }
      if (schemaElement.isSetPrecision()) {
        primitiveBuilder.precision(schemaElement.precision);
      }
      if (schemaElement.isSetScale()) {
        primitiveBuilder.scale(schemaElement.scale);
      }
      if (columnOrders != null) {
        org.apache.parquet.schema.ColumnOrder columnOrder = fromParquetColumnOrder(columnOrders.get(columnCount));
        // As per parquet format 2.4.0 no UNDEFINED order is supported. So, set undefined column order for the types
        // where ordering is not supported.
        if (columnOrder.getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER
            && (schemaElement.type == Type.INT96 || schemaElement.converted_type == ConvertedType.INTERVAL)) {
          columnOrder = org.apache.parquet.schema.ColumnOrder.undefined();
        }
        primitiveBuilder.columnOrder(columnOrder);
      }
      childBuilder = primitiveBuilder;

    } else {
      childBuilder = builder.group(fromParquetRepetition(schemaElement.repetition_type));
      buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children, columnOrders, columnCount);
    }

    if (schemaElement.isSetLogicalType()) {
      childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType));
    }
    if (schemaElement.isSetConverted_type()) {
      OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement).toOriginalType();
      OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ?
         getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null;
      if (!originalType.equals(newOriginalType)) {
        if (newOriginalType != null) {
          LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.",
            schemaElement.converted_type, schemaElement.logicalType);
        }
        childBuilder.as(originalType);
      }
    }
    if (schemaElement.isSetField_id()) {
      childBuilder.id(schemaElement.field_id);
    }

    childBuilder.named(schemaElement.name);
    ++columnCount;
  }
}
 
Example 7
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  LogicalType logicalType = schema.getLogicalType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
      builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
          .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
    } else {
      builder = Types.primitive(BINARY, repetition).as(stringType());
    }
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(enumType());
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
        .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
      builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
    } else {
      LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}