parquet.schema.OriginalType Java Examples

The following examples show how to use parquet.schema.OriginalType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExaParquetWriterImpl.java    From hadoop-etl-udfs with MIT License 6 votes vote down vote up
static private List<Type> typeInfoToParquetTypes(final List<ExaParquetTypeInfo> exaParquetTypeInfos) {
    List<Type> types = new ArrayList<>();
    for (ExaParquetTypeInfo exaType: exaParquetTypeInfos) {
        if (exaType.length != 0) {
            types.add(new PrimitiveType(
                    Type.Repetition.valueOf(exaType.typeRepitition),
                    PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName),
                    exaType.length,
                    exaType.name));
        } else {
            types.add(new PrimitiveType(
                    Type.Repetition.valueOf(exaType.typeRepitition),
                    PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName),
                    exaType.name,
                    exaType.originalType == null ? null : OriginalType.valueOf(exaType.originalType)));
        }
    }
    return types;
}
 
Example #2
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 6 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[cpath.size()]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example #3
Source File: SimpleRecordConverter.java    From parquet-tools with Apache License 2.0 6 votes vote down vote up
private Converter createConverter(Type field) {
  if (field.isPrimitive()) {
    OriginalType otype = field.getOriginalType();
    if (otype != null) {
      switch (otype) {
        case MAP: break;
        case LIST: break;
        case UTF8: return new StringConverter(field.getName());
        case MAP_KEY_VALUE: break;
        case ENUM: break;
      }
    }

    return new SimplePrimitiveConverter(field.getName());
  }

  return new SimpleRecordConverter(field.asGroupType(), field.getName(), this);
}
 
Example #4
Source File: ParquetMetadataReader.java    From paraflow with Apache License 2.0 4 votes vote down vote up
private static OriginalType getOriginalType(ConvertedType type)
{
    switch (type) {
        case UTF8:
            return OriginalType.UTF8;
        case MAP:
            return OriginalType.MAP;
        case MAP_KEY_VALUE:
            return OriginalType.MAP_KEY_VALUE;
        case LIST:
            return OriginalType.LIST;
        case ENUM:
            return OriginalType.ENUM;
        case DECIMAL:
            return OriginalType.DECIMAL;
        case DATE:
            return OriginalType.DATE;
        case TIME_MILLIS:
            return OriginalType.TIME_MILLIS;
        case TIMESTAMP_MILLIS:
            return OriginalType.TIMESTAMP_MILLIS;
        case INTERVAL:
            return OriginalType.INTERVAL;
        case INT_8:
            return OriginalType.INT_8;
        case INT_16:
            return OriginalType.INT_16;
        case INT_32:
            return OriginalType.INT_32;
        case INT_64:
            return OriginalType.INT_64;
        case UINT_8:
            return OriginalType.UINT_8;
        case UINT_16:
            return OriginalType.UINT_16;
        case UINT_32:
            return OriginalType.UINT_32;
        case UINT_64:
            return OriginalType.UINT_64;
        case JSON:
            return OriginalType.JSON;
        case BSON:
            return OriginalType.BSON;
        default:
            throw new IllegalArgumentException("Unknown converted type " + type);
    }
}
 
Example #5
Source File: PentahoParquetWriteSupport.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
private PrimitiveType convertToPrimitiveType( IParquetOutputField f ) {
  Type.Repetition rep = f.getAllowNull() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED;
  String formatFieldName = f.getFormatFieldName();
  switch ( f.getParquetType() ) {
    case BINARY:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName );
    case BOOLEAN:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BOOLEAN, formatFieldName );
    case DOUBLE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.DOUBLE, formatFieldName );
    case FLOAT:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.FLOAT, formatFieldName );
    case INT_32:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName );
    case UTF8:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName, OriginalType.UTF8 );
    case INT_64:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName, OriginalType.INT_64 );
    case INT_96:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT96, formatFieldName );
    case DATE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName, OriginalType.DATE );
    case DECIMAL:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_32:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_64:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case TIMESTAMP_MILLIS:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName,
        OriginalType.TIMESTAMP_MILLIS );
    default:
      throw new RuntimeException( "Unsupported output type: " + f.getParquetType() );
  }
}