parquet.schema.Types Java Examples

The following examples show how to use parquet.schema.Types. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
protected Type buildSchema() {
  JsonElementConverter elementConverter = this.elementConverter;
  JsonElementConverter keyConverter = getKeyConverter();
  GroupType mapGroup =
      Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY)
          .asGroupType();
  String columnName = this.jsonSchema.getColumnName();
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType();
    case REQUIRED:
      return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType();
    default:
      return null;
  }
}
 
Example #2
Source File: ParquetMetadataReader.java    From paraflow with Apache License 2.0 5 votes vote down vote up
private static MessageType readParquetSchema(List<SchemaElement> schema)
{
    Iterator<SchemaElement> schemaIterator = schema.iterator();
    SchemaElement rootSchema = schemaIterator.next();
    Types.MessageTypeBuilder builder = Types.buildMessage();
    readTypeSchema(builder, schemaIterator, rootSchema.getNum_children());
    return builder.named(rootSchema.name);
}
 
Example #3
Source File: ParquetMetadataReader.java    From paraflow with Apache License 2.0 5 votes vote down vote up
private static void readTypeSchema(Types.GroupBuilder<?> builder, Iterator<SchemaElement> schemaIterator, int typeCount)
{
    for (int i = 0; i < typeCount; i++) {
        SchemaElement element = schemaIterator.next();
        Types.Builder<?, ?> typeBuilder;
        if (element.type == null) {
            typeBuilder = builder.group(Repetition.valueOf(element.repetition_type.name()));
            readTypeSchema((Types.GroupBuilder<?>) typeBuilder, schemaIterator, element.num_children);
        }
        else {
            Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name()));
            if (element.isSetType_length()) {
                primitiveBuilder.length(element.type_length);
            }
            if (element.isSetPrecision()) {
                primitiveBuilder.precision(element.precision);
            }
            if (element.isSetScale()) {
                primitiveBuilder.scale(element.scale);
            }
            typeBuilder = primitiveBuilder;
        }

        if (element.isSetConverted_type()) {
            typeBuilder.as(getOriginalType(element.converted_type));
        }
        if (element.isSetField_id()) {
            typeBuilder.id(element.field_id);
        }
        typeBuilder.named(element.name);
    }
}
 
Example #4
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
protected Type buildSchema() {
  String columnName = this.jsonSchema.getColumnName();
  if (this.repeated) {
    return Types.repeated(BINARY).as(UTF8).named(columnName);
  }
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optional(BINARY).as(UTF8).named(columnName);
    case REQUIRED:
      return Types.required(BINARY).as(UTF8).named(columnName);
    default:
      throw new RuntimeException("Unsupported Repetition type");
  }
}
 
Example #5
Source File: PentahoParquetWriteSupport.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
private PrimitiveType convertToPrimitiveType( IParquetOutputField f ) {
  Type.Repetition rep = f.getAllowNull() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED;
  String formatFieldName = f.getFormatFieldName();
  switch ( f.getParquetType() ) {
    case BINARY:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName );
    case BOOLEAN:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BOOLEAN, formatFieldName );
    case DOUBLE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.DOUBLE, formatFieldName );
    case FLOAT:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.FLOAT, formatFieldName );
    case INT_32:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName );
    case UTF8:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName, OriginalType.UTF8 );
    case INT_64:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName, OriginalType.INT_64 );
    case INT_96:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT96, formatFieldName );
    case DATE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName, OriginalType.DATE );
    case DECIMAL:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_32:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_64:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case TIMESTAMP_MILLIS:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName,
        OriginalType.TIMESTAMP_MILLIS );
    default:
      throw new RuntimeException( "Unsupported output type: " + f.getParquetType() );
  }
}