Java Code Examples for org.apache.parquet.schema.GroupType#getFields()
The following examples show how to use
org.apache.parquet.schema.GroupType#getFields() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 7 votes |
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) { List<Type> fields = schema.getFields(); List<Schema.Field> avroFields = avroSchema.getFields(); int index = 0; // parquet ignores Avro nulls, so index may differ for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) { Schema.Field avroField = avroFields.get(avroIndex); if (avroField.schema().getType().equals(Schema.Type.NULL)) { continue; } Type fieldType = fields.get(index); Object value = model.getField(record, avroField.name(), avroIndex); if (value != null) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, avroField.schema(), value); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + avroField.name()); } index++; } }
Example 2
Source File: ColumnIOFactory.java From parquet-mr with Apache License 2.0 | 6 votes |
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) { GroupColumnIO oldIO = current; current = newIO; for (Type type : groupType.getFields()) { // if the file schema does not contain the field it will just stay null if (requestedGroupType.containsField(type.getName())) { currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName()); currentRequestedType = requestedGroupType.getType(currentRequestedIndex); if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { incompatibleSchema(type, currentRequestedType); } type.accept(this); } } current = oldIO; }
Example 3
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) { for (Type field : fullSchema.getFields()) { if (requested.containsField(field.getName())) { Type requestedType = requested.getType(field.getName()); // if a field is in requested schema and the type of it is a group type, then do recursive check if (!field.isPrimitive()) { if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) { return true; } else { continue;// check next field } } } else { if (field.getRepetition() == Type.Repetition.REQUIRED) { return true; // if a field is missing in requested schema and it's required } else { continue; // the missing field is not required, then continue checking next field } } } return false; }
Example 4
Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) { List<Type> fields = schema.getFields(); List<Schema.Field> avroFields = avroSchema.getFields(); int index = 0; // parquet ignores Avro nulls, so index may differ for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) { Schema.Field avroField = avroFields.get(avroIndex); if (avroField.schema().getType().equals(Schema.Type.NULL)) { continue; } Type fieldType = fields.get(index); Object value = model.getField(record, avroField.name(), avroIndex); if (value != null) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, avroField.schema(), value); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + avroField.name()); } index++; } }
Example 5
Source File: TajoWriteSupport.java From tajo with Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema tajoSchema, Tuple tuple) { List<Type> fields = schema.getFields(); // Parquet ignores Tajo NULL_TYPE columns, so the index may differ. int index = 0; for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) { Column column = tajoSchema.getColumn(tajoIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) { continue; } Type fieldType = fields.get(index); if (!tuple.isBlankOrNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); writeValue(column, tuple, tajoIndex); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + column.getSimpleName()); } ++index; } }
Example 6
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) { List<Type> fields = schema.getFields(); List<Schema.Field> avroFields = avroSchema.getFields(); int index = 0; // parquet ignores Avro nulls, so index may differ for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) { Schema.Field avroField = avroFields.get(avroIndex); if (avroField.schema().getType().equals(Schema.Type.NULL)) { continue; } Type fieldType = fields.get(index); Object value = model.getField(record, avroField.name(), avroIndex); if (value != null) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, avroField.schema(), value); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + avroField.name()); } index++; } }
Example 7
Source File: HiveSchemaUtil.java From hudi with Apache License 2.0 | 6 votes |
/** * Create an Array Hive schema from equivalent parquet list type. */ private static String createHiveArray(Type elementType, String elementName) { StringBuilder array = new StringBuilder(); array.append("ARRAY< "); if (elementType.isPrimitive()) { array.append(convertField(elementType)); } else { final GroupType groupType = elementType.asGroupType(); final List<Type> groupFields = groupType.getFields(); if (groupFields.size() > 1 || (groupFields.size() == 1 && (elementType.getName().equals("array") || elementType.getName().equals(elementName + "_tuple")))) { array.append(convertField(elementType)); } else { array.append(convertField(groupType.getFields().get(0))); } } array.append(">"); return array.toString(); }
Example 8
Source File: ParquetRecordWriter.java From dremio-oss with Apache License 2.0 | 6 votes |
/** * Changes the list inner '$data$' vector name to 'element' in the schema */ private Type renameChildTypeToElement(Type childType) { if (childType.isPrimitive()) { PrimitiveType childPrimitiveType = childType.asPrimitiveType(); return new PrimitiveType(childType.getRepetition(), childPrimitiveType.getPrimitiveTypeName(), childPrimitiveType.getTypeLength(), "element", childPrimitiveType.getOriginalType(), childPrimitiveType.getDecimalMetadata(), childPrimitiveType.getId()); } else { GroupType childGroupType = childType.asGroupType(); Type.ID id = childGroupType.getId(); GroupType groupType = new GroupType(childType.getRepetition(), "element", childType.getOriginalType(), childGroupType.getFields()); if (id != null) { groupType = groupType.withId(id.hashCode()); } return groupType; } }
Example 9
Source File: TestCTAS.java From dremio-oss with Apache License 2.0 | 5 votes |
private void verifyFieldHasColumnId(Type field) { System.out.println("Verifying column " + field.getName()); assertTrue("Field " + field.getName() + " does not have column id", field.getId() != null); if (field instanceof GroupType) { GroupType groupType = (GroupType)field; if (groupType.getOriginalType() == OriginalType.LIST) { groupType = groupType.getFields().get(0).asGroupType(); } for(Type child : groupType.getFields()) { verifyFieldHasColumnId(child); } } }
Example 10
Source File: TypeWithSchemaVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { int id = -1; if (field.getId() != null) { id = field.getId().intValue(); } Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null; results.add(visitField(iField, field, visitor)); } return results; }
Example 11
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { results.add(visitField(field, visitor)); } return results; }
Example 12
Source File: TypeWithSchemaVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { int id = -1; if (field.getId() != null) { id = field.getId().intValue(); } Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null; results.add(visitField(iField, field, visitor)); } return results; }
Example 13
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { visitor.beforeField(field); try { results.add(visit(field, visitor)); } finally { visitor.afterField(field); } } return results; }
Example 14
Source File: RowConverter.java From flink with Apache License 2.0 | 5 votes |
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) { this.typeInfo = typeInfo; this.parentDataHolder = parent; this.posInParentRow = pos; this.converters = new Converter[schema.getFieldCount()]; int i = 0; if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) { for (Type field : schema.getFields()) { converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this); i++; } } }
Example 15
Source File: JsonRecordFormatter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Map<String, JsonRecordFormatter> buildWriters(GroupType groupSchema) { Map<String, JsonRecordFormatter> writers = new LinkedHashMap<String, JsonRecordFormatter>(); for (Type type : groupSchema.getFields()) { if (type.isPrimitive()) { writers.put(type.getName(), new JsonPrimitiveWriter(type)); } else { writers.put(type.getName(), new JsonGroupFormatter((GroupType) type)); } } return writers; }
Example 16
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); int fcount = type.getFieldCount(); out.format("%s: %s F:%d%n", name, rep, fcount); cpath.add(type.getName()); for (Type ftype : type.getFields()) { showDetails(out, ftype, depth + 1, container, cpath); } cpath.remove(cpath.size() - 1); }
Example 17
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); int fcount = type.getFieldCount(); out.format("%s: %s F:%d%n", name, rep, fcount); cpath.add(type.getName()); for (Type ftype : type.getFields()) { showDetails(out, ftype, depth + 1, container, cpath, showOriginalTypes); } cpath.remove(cpath.size() - 1); }
Example 18
Source File: ProtoMessageConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
ProtoMessageConverter(ParentValueContainer pvc, Message.Builder builder, GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); converters = new Converter[schemaSize]; this.parent = pvc; int parquetFieldIndex = 1; if (pvc == null) { throw new IllegalStateException("Missing parent value container"); } myBuilder = builder; Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType(); for (Type parquetField : parquetSchema.getFields()) { Descriptors.FieldDescriptor protoField = protoDescriptor.findFieldByName(parquetField.getName()); if (protoField == null) { String description = "Scheme mismatch \n\"" + parquetField + "\"" + "\n proto descriptor:\n" + protoDescriptor.toProto(); throw new IncompatibleSchemaModificationException("Cant find \"" + parquetField.getName() + "\" " + description); } converters[parquetFieldIndex - 1] = newMessageConverter(myBuilder, protoField, parquetField); parquetFieldIndex++; } }
Example 19
Source File: RowConverter.java From flink with Apache License 2.0 | 5 votes |
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) { this.typeInfo = typeInfo; this.parentDataHolder = parent; this.posInParentRow = pos; this.converters = new Converter[schema.getFieldCount()]; int i = 0; if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) { for (Type field : schema.getFields()) { converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this); i++; } } }
Example 20
Source File: ParquetTypeVisitor.java From presto with Apache License 2.0 | 5 votes |
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { results.add(visitField(field, visitor)); } return results; }