Java Code Examples for org.apache.parquet.schema.GroupType#getOriginalType()
The following examples show how to use
org.apache.parquet.schema.GroupType#getOriginalType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 5 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation != null) { switch (annotation) { case LIST: return visitList(group, visitor); case MAP: return visitMap(group, visitor); default: } } return visitor.struct(group, visitFields(group, visitor)); } }
Example 2
Source File: ParquetGroupConverter.java From dremio-oss with Apache License 2.0 | 5 votes |
private Converter groupConverter(String fieldName, OutputMutator mutator, List<Field> arrowSchema, GroupType groupType, PathSegment colNextChild) { Collection<SchemaPath> c = new ArrayList<>(); if (groupType.getOriginalType() == OriginalType.LIST && colNextChild != null && colNextChild.isNamed() && colNextChild.getNameSegment().getPath().equals("list")) { colNextChild = colNextChild.getChild(); } while (colNextChild != null) { if (colNextChild.isNamed()) { break; } colNextChild = colNextChild.getChild(); } if (colNextChild != null) { SchemaPath s = new SchemaPath(colNextChild.getNameSegment()); c.add(s); } if (arrowSchema != null) { return groupConverterFromArrowSchema(fieldName, groupType.getName(), groupType, c); } return defaultGroupConverter(fieldName, mutator, groupType, c, null); }
Example 3
Source File: ParquetGroupConverter.java From dremio-oss with Apache License 2.0 | 5 votes |
Converter defaultGroupConverter(String fieldName, OutputMutator mutator, GroupType groupType, Collection<SchemaPath> c, List<Field> arrowSchema) { if (groupType.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(groupType)) { return new LogicalListL1Converter( columnResolver, fieldName, mutator, getWriterProvider(), groupType, c, options, arrowSchema, schemaHelper ); } final String nameForChild = getNameForChild(columnResolver.getBatchSchemaColumnName(fieldName)); final StructWriter struct; if (groupType.isRepetition(REPEATED)) { if (arrowSchema != null) { //TODO assert this should never occur at this level // only parquet writer that writes arrowSchema doesn't write repeated fields except // as part of a LOGICAL LIST, thus this scenario (repeated + arrow schema present) can // only happen in LogicalList converter arrowSchema = handleRepeatedField(arrowSchema, groupType); } struct = list(nameForChild).struct(); } else { struct = getWriterProvider().struct(nameForChild); } return new StructGroupConverter(columnResolver, fieldName, mutator, struct, groupType, c, options, arrowSchema, schemaHelper); }
Example 4
Source File: ParquetTypeHelper.java From dremio-oss with Apache License 2.0 | 5 votes |
public static Optional<Field> toField(final Type parquetField, final SchemaDerivationHelper schemaHelper) { if (parquetField.isPrimitive()) { SchemaPath columnSchemaPath = SchemaPath.getCompoundPath(parquetField.getName()); return Optional.of(createField(columnSchemaPath, parquetField.asPrimitiveType(), parquetField.getOriginalType(), schemaHelper)); } // Handle non-primitive cases final GroupType complexField = (GroupType) parquetField; if (OriginalType.LIST == complexField.getOriginalType()) { GroupType repeatedField = (GroupType) complexField.getFields().get(0); // should have only one child field type if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) { throw UserException.unsupportedError() .message("Parquet List Type is expected to contain only one sub type. Column '%s' contains %d", parquetField.getName(), complexField.getFieldCount()) .build(); } Optional<Field> subField = toField(repeatedField.getFields().get(0), schemaHelper); return subField.map(sf -> new Field(complexField.getName(), true, new ArrowType.List(), Arrays.asList(new Field[] {sf}))); } final boolean isStructType = complexField.getOriginalType() == null; if (isStructType) { // it is struct return toComplexField(complexField, new ArrowType.Struct(), schemaHelper); } // Unsupported complex type return Optional.empty(); }
Example 5
Source File: TestCTAS.java From dremio-oss with Apache License 2.0 | 5 votes |
private void verifyFieldHasColumnId(Type field) { System.out.println("Verifying column " + field.getName()); assertTrue("Field " + field.getName() + " does not have column id", field.getId() != null); if (field instanceof GroupType) { GroupType groupType = (GroupType)field; if (groupType.getOriginalType() == OriginalType.LIST) { groupType = groupType.getFields().get(0).asGroupType(); } for(Type child : groupType.getFields()) { verifyFieldHasColumnId(child); } } }
Example 6
Source File: List3Levels.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Will validate the structure of the list * @param list the Parquet List */ public List3Levels(GroupType list) { if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.list = list; Type repeatedField = list.getFields().get(0); if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.repeated = repeatedField.asGroupType(); this.element = repeated.getFields().get(0); }
Example 7
Source File: ParquetTypeVisitor.java From presto with Apache License 2.0 | 4 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation == LIST) { checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, "Invalid list: does not contain single repeated field: " + group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: " + group); visitor.fieldNames.push(repeatedElement.getName()); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { elementResult = visitField(repeatedElement.getType(0), visitor); } return visitor.list(group, elementResult); } finally { visitor.fieldNames.pop(); } } else if (annotation == MAP) { checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, "Invalid map: does not contain single repeated field: " + group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); visitor.fieldNames.push(repeatedKeyValue.getName()); try { T keyResult = null; T valueResult = null; if (repeatedKeyValue.getFieldCount() == 2) { keyResult = visitField(repeatedKeyValue.getType(0), visitor); valueResult = visitField(repeatedKeyValue.getType(1), visitor); } else if (repeatedKeyValue.getFieldCount() == 1) { Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { keyResult = visitField(keyOrValue, visitor); // value result remains null } else { valueResult = visitField(keyOrValue, visitor); // key result remains null } } return visitor.map(group, keyResult, valueResult); } finally { visitor.fieldNames.pop(); } } return visitor.struct(group, visitFields(group, visitor)); } }
Example 8
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 4 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation != null) { switch (annotation) { case LIST: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid list: does not contain single repeated field: " + group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: " + group); visitor.fieldNames.push(repeatedElement.getName()); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { elementResult = visitField(repeatedElement.getType(0), visitor); } return visitor.list(group, elementResult); } finally { visitor.fieldNames.pop(); } case MAP: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid map: does not contain single repeated field: " + group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); visitor.fieldNames.push(repeatedKeyValue.getName()); try { T keyResult = null; T valueResult = null; switch (repeatedKeyValue.getFieldCount()) { case 2: // if there are 2 fields, both key and value are projected keyResult = visitField(repeatedKeyValue.getType(0), visitor); valueResult = visitField(repeatedKeyValue.getType(1), visitor); case 1: // if there is just one, use the name to determine what it is Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { keyResult = visitField(keyOrValue, visitor); // value result remains null } else { valueResult = visitField(keyOrValue, visitor); // key result remains null } default: // both results will remain null } return visitor.map(group, keyResult, valueResult); } finally { visitor.fieldNames.pop(); } default: } } return visitor.struct(group, visitFields(group, visitor)); } }