Java Code Examples for org.apache.parquet.schema.PrimitiveType#getLogicalTypeAnnotation()
The following examples show how to use
org.apache.parquet.schema.PrimitiveType#getLogicalTypeAnnotation() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); PrimitiveTypeName ptype = type.getPrimitiveTypeName(); out.format("%s: %s %s", name, rep, ptype); if (showOriginalTypes) { OriginalType otype; try { otype = type.getOriginalType(); } catch (Exception e) { otype = null; } if (otype != null) out.format(" O:%s", otype); } else { LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation(); if (ltype != null) out.format(" L:%s", ltype); } if (container != null) { cpath.add(type.getName()); String[] paths = cpath.toArray(new String[0]); cpath.remove(cpath.size() - 1); ColumnDescriptor desc = container.getColumnDescription(paths); int defl = desc.getMaxDefinitionLevel(); int repl = desc.getMaxRepetitionLevel(); out.format(" R:%d D:%d", repl, defl); } out.println(); }
Example 2
Source File: BinaryTruncator.java From parquet-mr with Apache License 2.0 | 5 votes |
public static BinaryTruncator getTruncator(PrimitiveType type) { if (type == null) { return NO_OP_TRUNCATOR; } switch (type.getPrimitiveTypeName()) { case INT96: return NO_OP_TRUNCATOR; case BINARY: case FIXED_LEN_BYTE_ARRAY: LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation(); if (logicalTypeAnnotation == null) { return DEFAULT_UTF8_TRUNCATOR; } return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<BinaryTruncator>() { @Override public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { return Optional.of(DEFAULT_UTF8_TRUNCATOR); } @Override public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { return Optional.of(DEFAULT_UTF8_TRUNCATOR); } @Override public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { return Optional.of(DEFAULT_UTF8_TRUNCATOR); } @Override public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { return Optional.of(DEFAULT_UTF8_TRUNCATOR); } }).orElse(NO_OP_TRUNCATOR); default: throw new IllegalArgumentException("No truncator is available for the type: " + type); } }
Example 3
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
/** * @param primitive a primitive type with a logical type annotation * @return the "correct" sort order of the type that applications assume */ private static SortOrder sortOrder(PrimitiveType primitive) { LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation(); if (annotation != null) { return annotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<SortOrder>() { @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { return intLogicalType.isSigned() ? of(SortOrder.SIGNED) : of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { return of(SortOrder.UNKNOWN); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { return of(SortOrder.SIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { return of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { return of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(UUIDLogicalTypeAnnotation uuidLogicalType) { return of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { return of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { return of(SortOrder.UNSIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { return of(SortOrder.UNKNOWN); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) { return of(SortOrder.UNKNOWN); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { return of(SortOrder.UNKNOWN); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { return of(SortOrder.UNKNOWN); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { return of(SortOrder.SIGNED); } @Override public Optional<SortOrder> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { return of(SortOrder.SIGNED); } }).orElse(defaultSortOrder(primitive.getPrimitiveTypeName())); } return defaultSortOrder(primitive.getPrimitiveTypeName()); }
Example 4
Source File: ShowDictionaryCommand.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unchecked") public int run() throws IOException { Preconditions.checkArgument(targets != null && targets.size() >= 1, "A Parquet file is required."); Preconditions.checkArgument(targets.size() == 1, "Cannot process multiple Parquet files."); String source = targets.get(0); ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source)); MessageType schema = reader.getFileMetaData().getSchema(); ColumnDescriptor descriptor = Util.descriptor(column, schema); PrimitiveType type = Util.primitive(column, schema); Preconditions.checkNotNull(type); DictionaryPageReadStore dictionaryReader; int rowGroup = 0; while ((dictionaryReader = reader.getNextDictionaryReader()) != null) { DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor); Dictionary dict = page.getEncoding().initDictionary(descriptor, page); console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize()); for (int i = 0; i <= dict.getMaxId(); i += 1) { switch(type.getPrimitiveTypeName()) { case BINARY: if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) { console.info("{}: {}", String.format("%6d", i), Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70)); } else { console.info("{}: {}", String.format("%6d", i), Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70)); } break; case INT32: console.info("{}: {}", String.format("%6d", i), dict.decodeToInt(i)); break; case INT64: console.info("{}: {}", String.format("%6d", i), dict.decodeToLong(i)); break; case FLOAT: console.info("{}: {}", String.format("%6d", i), dict.decodeToFloat(i)); break; case DOUBLE: console.info("{}: {}", String.format("%6d", i), dict.decodeToDouble(i)); break; default: throw new IllegalArgumentException( "Unknown dictionary type: " + type.getPrimitiveTypeName()); } } reader.skipNextRowGroup(); rowGroup += 1; } console.info(""); return 0; }
Example 5
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 3 votes |
/** * Returns whether to use signed order min and max with a type. It is safe to * use signed min and max when the type is a string type and contains only * ASCII characters (where the sign bit was 0). This checks whether the type * is a string type and uses {@code useSignedStringMinMax} to determine if * only ASCII characters were written. * * @param type a primitive type with a logical type annotation * @return true if signed order min/max can be used with this type */ private boolean overrideSortOrderToSigned(PrimitiveType type) { // even if the override is set, only return stats for string-ish types // a null type annotation is considered string-ish because some writers // failed to use the UTF8 annotation. LogicalTypeAnnotation annotation = type.getLogicalTypeAnnotation(); return useSignedStringMinMax && PrimitiveTypeName.BINARY == type.getPrimitiveTypeName() && (annotation == null || STRING_TYPES.contains(annotation.getClass())); }