Java Code Examples for org.apache.parquet.column.ColumnDescriptor#getMaxRepetitionLevel()
The following examples show how to use
org.apache.parquet.column.ColumnDescriptor#getMaxRepetitionLevel() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 6 votes |
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) { String name = Strings.repeat(".", depth) + type.getName(); OriginalType otype = type.getOriginalType(); Repetition rep = type.getRepetition(); PrimitiveTypeName ptype = type.getPrimitiveTypeName(); out.format("%s: %s %s", name, rep, ptype); if (otype != null) out.format(" O:%s", otype); if (container != null) { cpath.add(type.getName()); String[] paths = cpath.toArray(new String[0]); cpath.remove(cpath.size() - 1); ColumnDescriptor desc = container.getColumnDescription(paths); int defl = desc.getMaxDefinitionLevel(); int repl = desc.getMaxRepetitionLevel(); out.format(" R:%d D:%d", repl, defl); } out.println(); }
Example 2
Source File: RichColumnDescriptor.java From presto with Apache License 2.0 | 5 votes |
public RichColumnDescriptor( ColumnDescriptor descriptor, PrimitiveType primitiveType) { super(descriptor.getPath(), primitiveType, descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); this.required = primitiveType.getRepetition() != OPTIONAL; }
Example 3
Source File: DeprecatedParquetVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
/** * Returns data type length for a given {@see ColumnDescriptor} and it's corresponding * {@see SchemaElement}. Neither is enough information alone as the max * repetition level (indicating if it is an array type) is in the ColumnDescriptor and * the length of a fixed width field is stored at the schema level. * * @return the length if fixed width, else -1 */ private int getDataTypeLength(ColumnDescriptor column, SchemaElement se) { if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (column.getMaxRepetitionLevel() > 0) { return -1; } if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { return se.getType_length() * 8; } else { return getTypeLengthInBits(column.getType()); } } else { return -1; } }
Example 4
Source File: DeprecatedParquetVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private TypeProtos.DataMode getDataMode(ColumnDescriptor column) { if (column.getMaxRepetitionLevel() > 0 ) { return DataMode.REPEATED; } else if (column.getMaxDefinitionLevel() == 0) { return TypeProtos.DataMode.REQUIRED; } else { return TypeProtos.DataMode.OPTIONAL; } }
Example 5
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) { String path = Joiner.on(".").skipNulls().join(desc.getPath()); PrimitiveTypeName type = desc.getType(); int defl = desc.getMaxDefinitionLevel(); int repl = desc.getMaxRepetitionLevel(); out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl); }
Example 6
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); PrimitiveTypeName ptype = type.getPrimitiveTypeName(); out.format("%s: %s %s", name, rep, ptype); if (showOriginalTypes) { OriginalType otype; try { otype = type.getOriginalType(); } catch (Exception e) { otype = null; } if (otype != null) out.format(" O:%s", otype); } else { LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation(); if (ltype != null) out.format(" L:%s", ltype); } if (container != null) { cpath.add(type.getName()); String[] paths = cpath.toArray(new String[0]); cpath.remove(cpath.size() - 1); ColumnDescriptor desc = container.getColumnDescription(paths); int defl = desc.getMaxDefinitionLevel(); int repl = desc.getMaxRepetitionLevel(); out.format(" R:%d D:%d", repl, defl); } out.println(); }
Example 7
Source File: SchemaCompatibilityValidator.java From parquet-mr with Apache License 2.0 | 5 votes |
private <T extends Comparable<T>> void validateColumn(Column<T> column) { ColumnPath path = column.getColumnPath(); Class<?> alreadySeen = columnTypesEncountered.get(path); if (alreadySeen != null && !alreadySeen.equals(column.getColumnType())) { throw new IllegalArgumentException("Column: " + path.toDotString() + " was provided with different types in the same predicate." + " Found both: (" + alreadySeen + ", " + column.getColumnType() + ")"); } if (alreadySeen == null) { columnTypesEncountered.put(path, column.getColumnType()); } ColumnDescriptor descriptor = getColumnDescriptor(path); if (descriptor == null) { // the column is missing from the schema. evaluation uses calls // updateNull() a value is missing, so this will be handled correctly. return; } if (descriptor.getMaxRepetitionLevel() > 0) { throw new IllegalArgumentException("FilterPredicates do not currently support repeated columns. " + "Column " + path.toDotString() + " is repeated."); } ValidTypeMap.assertTypeValid(column, descriptor.getType()); }
Example 8
Source File: ParquetReaderUtility.java From Bats with Apache License 2.0 | 4 votes |
/** * Check whether any of columns in the given list is either nested or repetitive. * * @param footer Parquet file schema * @param columns list of query SchemaPath objects */ public static boolean containsComplexColumn(ParquetMetadata footer, List<SchemaPath> columns) { MessageType schema = footer.getFileMetaData().getSchema(); if (Utilities.isStarQuery(columns)) { for (Type type : schema.getFields()) { if (!type.isPrimitive()) { return true; } } for (ColumnDescriptor col : schema.getColumns()) { if (col.getMaxRepetitionLevel() > 0) { return true; } } return false; } else { Map<String, ColumnDescriptor> colDescMap = ParquetReaderUtility.getColNameToColumnDescriptorMapping(footer); Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer); for (SchemaPath schemaPath : columns) { // Schema path which is non-leaf is complex column if (!schemaPath.isLeaf()) { logger.trace("rowGroupScan contains complex column: {}", schemaPath.getUnIndexed().toString()); return true; } // following column descriptor lookup failure may mean two cases, depending on subsequent SchemaElement lookup: // 1. success: queried column is complex, i.e. GroupType // 2. failure: queried column is not in schema and thus is non-complex ColumnDescriptor column = colDescMap.get(schemaPath.getUnIndexed().toString().toLowerCase()); if (column == null) { SchemaElement schemaElement = schemaElements.get(schemaPath.getUnIndexed().toString().toLowerCase()); if (schemaElement != null) { return true; } } else { if (column.getMaxRepetitionLevel() > 0) { logger.trace("rowGroupScan contains repetitive column: {}", schemaPath.getUnIndexed().toString()); return true; } } } } return false; }
Example 9
Source File: ColumnWriterV2.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path) { return path.getMaxRepetitionLevel() == 0 ? NULL_WRITER : new RLEWriterForV2(props.newRepetitionLevelEncoder(path)); }