Java Code Examples for org.apache.parquet.schema.Type#getName()
The following examples show how to use
org.apache.parquet.schema.Type#getName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java From presto with Apache License 2.0 | 6 votes |
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { //support projection only on key of a map if (valueType == null) { return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType)); } if (!valueType.getName().equals("value")) { throw new RuntimeException(valueType.getName() + " should be value"); } return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType, valueType)); }
Example 2
Source File: SimpleGroup.java From parquet-mr with Apache License 2.0 | 6 votes |
private StringBuilder appendToString(StringBuilder builder, String indent) { int i = 0; for (Type field : schema.getFields()) { String name = field.getName(); List<Object> values = data[i]; ++i; if (values != null && !values.isEmpty()) { for (Object value : values) { builder.append(indent).append(name); if (value == null) { builder.append(": NULL\n"); } else if (value instanceof Group) { builder.append('\n'); ((SimpleGroup) value).appendToString(builder, indent + " "); } else { builder.append(": ").append(value.toString()).append('\n'); } } } } return builder; }
Example 3
Source File: GroupWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
private void writeGroup(Group group, GroupType type) { int fieldCount = type.getFieldCount(); for (int field = 0; field < fieldCount; ++field) { int valueCount = group.getFieldRepetitionCount(field); if (valueCount > 0) { Type fieldType = type.getType(field); String fieldName = fieldType.getName(); recordConsumer.startField(fieldName, field); for (int index = 0; index < valueCount; ++index) { if (fieldType.isPrimitive()) { group.writeValue(field, index, recordConsumer); } else { recordConsumer.startGroup(); writeGroup(group.getGroup(field, index), fieldType.asGroupType()); recordConsumer.endGroup(); } } recordConsumer.endField(fieldName, field); } } }
Example 4
Source File: PruneColumnsCommand.java From parquet-mr with Apache License 2.0 | 6 votes |
private Type pruneColumnsInField(Type field, List<String> currentPath, Set<ColumnPath> prunePaths) { String fieldName = field.getName(); currentPath.add(fieldName); ColumnPath path = ColumnPath.get(currentPath.toArray(new String[0])); Type prunedField = null; if (!prunePaths.contains(path)) { if (field.isPrimitive()) { prunedField = field; } else { List<Type> childFields = ((GroupType) field).getFields(); List<Type> prunedFields = pruneColumnsInFields(childFields, currentPath, prunePaths); if (prunedFields.size() > 0) { prunedField = ((GroupType) field).withNewFields(prunedFields); } } } currentPath.remove(fieldName); return prunedField; }
Example 5
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 6 votes |
public String toString(String indent) { StringBuilder result = new StringBuilder(); int i = 0; for (Type field : this.schema.getFields()) { String name = field.getName(); List<Object> values = this.data[i]; for (Object value : values) { result.append(indent).append(name); if (value == null) { result.append(": NULL\n"); } else if (value instanceof Group) { result.append("\n").append(((ParquetGroup) value).toString(indent + " ")); } else { result.append(": ").append(value.toString()).append("\n"); } } i++; } return result.toString(); }
Example 6
Source File: SchemaIntersection.java From parquet-mr with Apache License 2.0 | 6 votes |
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
Example 7
Source File: HiveSchemaUtil.java From hudi with Apache License 2.0 | 6 votes |
/** * Returns equivalent Hive table schema read from a parquet file. * * @param messageType : Parquet Schema * @return : Hive Table schema read from parquet file MAP[String,String] */ public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException { Map<String, String> schema = new LinkedHashMap<>(); List<Type> parquetFields = messageType.getFields(); for (Type parquetType : parquetFields) { StringBuilder result = new StringBuilder(); String key = parquetType.getName(); if (parquetType.isRepetition(Type.Repetition.REPEATED)) { result.append(createHiveArray(parquetType, "")); } else { result.append(convertField(parquetType)); } schema.put(hiveCompatibleFieldName(key, false), result.toString()); } return schema; }
Example 8
Source File: TupleWriter.java From hadoop-etl-udfs with MIT License | 6 votes |
private void writeTuple(Tuple tuple, GroupType type) { for (int index = 0; index < type.getFieldCount(); index++) { Type fieldType = type.getType(index); String fieldName = fieldType.getName(); // empty fields have to be omitted if (tuple.isNull(index)) continue; recordConsumer.startField(fieldName, index); if (fieldType.isPrimitive()) { tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType); } else { recordConsumer.startGroup(); writeTuple(tuple.getTuple(index), fieldType.asGroupType()); recordConsumer.endGroup(); } recordConsumer.endField(fieldName, index); } }
Example 9
Source File: TestDataWritableWriter.java From presto with Apache License 2.0 | 6 votes |
/** * It writes all the fields contained inside a group to the RecordConsumer. * * @param value The list of values contained in the group. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the group schema. */ private void writeGroupFields(Object value, StructObjectInspector inspector, GroupType type) { if (value != null) { List<? extends StructField> fields = inspector.getAllStructFieldRefs(); List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value); for (int i = 0; i < type.getFieldCount(); i++) { Type fieldType = type.getType(i); String fieldName = fieldType.getName(); Object fieldValue = fieldValuesList.get(i); if (fieldValue != null) { ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector(); recordConsumer.startField(fieldName, i); writeValue(fieldValue, fieldInspector, fieldType); recordConsumer.endField(fieldName, i); } } } }
Example 10
Source File: ParquetRowiseReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private static Type getType(String[] pathSegments, int depth, MessageType schema) { Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1)); if (depth + 1 == pathSegments.length) { return type; } else { Preconditions.checkState(!type.isPrimitive()); return new GroupType(type.getRepetition(), type.getName(), type.getOriginalType(), getType(pathSegments, depth + 1, schema)); } }
Example 11
Source File: ProtoMessageConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
ProtoMessageConverter(ParentValueContainer pvc, Message.Builder builder, GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); converters = new Converter[schemaSize]; this.parent = pvc; int parquetFieldIndex = 1; if (pvc == null) { throw new IllegalStateException("Missing parent value container"); } myBuilder = builder; Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType(); for (Type parquetField : parquetSchema.getFields()) { Descriptors.FieldDescriptor protoField = protoDescriptor.findFieldByName(parquetField.getName()); if (protoField == null) { String description = "Scheme mismatch \n\"" + parquetField + "\"" + "\n proto descriptor:\n" + protoDescriptor.toProto(); throw new IncompatibleSchemaModificationException("Cant find \"" + parquetField.getName() + "\" " + description); } converters[parquetFieldIndex - 1] = newMessageConverter(myBuilder, protoField, parquetField); parquetFieldIndex++; } }
Example 12
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
Example 13
Source File: DataWritableWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void writeData(final ArrayWritable arr, final GroupType type) { if (arr == null) { return; } final int fieldCount = type.getFieldCount(); Writable[] values = arr.get(); for (int field = 0; field < fieldCount; ++field) { final Type fieldType = type.getType(field); final String fieldName = fieldType.getName(); final Writable value = values[field]; if (value == null) { continue; } recordConsumer.startField(fieldName, field); if (fieldType.isPrimitive()) { writePrimitive(value); } else { recordConsumer.startGroup(); if (value instanceof ArrayWritable) { if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { writeArray((ArrayWritable) value, fieldType.asGroupType()); } else { writeData((ArrayWritable) value, fieldType.asGroupType()); } } else if (value != null) { throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); } recordConsumer.endGroup(); } recordConsumer.endField(fieldName, field); } }
Example 14
Source File: SimpleRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Converter createConverter(Type field) { LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation(); if (field.isPrimitive()) { if (ltype != null) { return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() { @Override public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { return of(new StringConverter(field.getName())); } @Override public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { int scale = decimalLogicalType.getScale(); return of(new DecimalConverter(field.getName(), scale)); } }).orElse(new SimplePrimitiveConverter(field.getName())); } return new SimplePrimitiveConverter(field.getName()); } GroupType groupType = field.asGroupType(); if (ltype != null) { return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() { @Override public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this)); } @Override public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this)); } }).orElse(new SimpleRecordConverter(groupType, field.getName(), this)); } return new SimpleRecordConverter(groupType, field.getName(), this); }
Example 15
Source File: DrillParquetReader.java From Bats with Apache License 2.0 | 5 votes |
private static Type getType(String[] pathSegments, int depth, MessageType schema) { Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1)); if (depth + 1 == pathSegments.length) { return type; } else { Preconditions.checkState(!type.isPrimitive()); return new GroupType(type.getRepetition(), type.getName(), getType(pathSegments, depth + 1, schema)); } }
Example 16
Source File: SimpleGroup.java From parquet-mr with Apache License 2.0 | 5 votes |
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
Example 17
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private FieldSchema getFieldSchema(Type parquetType) throws FrontendException { final String fieldName = parquetType.getName(); if (parquetType.isPrimitive()) { return getSimpleFieldSchema(fieldName, parquetType); } else { return getComplexFieldSchema(fieldName, parquetType); } }
Example 18
Source File: ParquetGroupConverter.java From dremio-oss with Apache License 2.0 | 4 votes |
protected void addChildConverter(String fieldName, OutputMutator mutator, List<Field> arrowSchema, Iterator<SchemaPath> colIterator, Type type, Function<String, String> childNameResolver) { // Match the name of the field in the schema definition to the name of the field in the query. String name = null; SchemaPath col; PathSegment colPath; PathSegment colNextChild = null; if (colIterator.hasNext()) { col = colIterator.next(); colPath = col.getRootSegment(); colNextChild = colPath.getChild(); while (true) { if (colPath.isNamed() && (!colPath.getNameSegment().getPath().equals("*"))) { name = colPath.getNameSegment().getPath(); // We may have a field that does not exist in the schema if (name.equalsIgnoreCase(type.getName())) { break; } } name = null; colPath = colNextChild; if (colPath == null) { break; } else { colNextChild = colPath.getChild(); } } } if (name == null) { name = type.getName(); } final String nameForChild = childNameResolver.apply(name); final String fullChildName = fieldName.isEmpty() ? nameForChild : fieldName.concat(".").concat(nameForChild); final Converter converter = type.isPrimitive() ? getConverterForType(fullChildName, type.asPrimitiveType()) : groupConverter(fullChildName, mutator, arrowSchema, type.asGroupType(), colNextChild); converters.add(converter); }
Example 19
Source File: TestDataWritableWriter.java From presto with Apache License 2.0 | 4 votes |
/** * It writes a map type and its key-pair values to the Parquet RecordConsumer. * This is called when the original type (MAP) is detected by writeValue(). * This function assumes the following schema: * optional group mapCol (MAP) { * repeated group map (MAP_KEY_VALUE) { * required TYPE key; * optional TYPE value; * } * } * * @param value The object that contains the map key-values. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the group (MAP) schema. */ private void writeMap(Object value, MapObjectInspector inspector, GroupType type) { // Get the internal map structure (MAP_KEY_VALUE) GroupType repeatedType = type.getType(0).asGroupType(); recordConsumer.startGroup(); Map<?, ?> mapValues = inspector.getMap(value); if (mapValues != null && mapValues.size() > 0) { recordConsumer.startField(repeatedType.getName(), 0); Type keyType = repeatedType.getType(0); String keyName = keyType.getName(); ObjectInspector keyInspector = inspector.getMapKeyObjectInspector(); Type valuetype = repeatedType.getType(1); String valueName = valuetype.getName(); ObjectInspector valueInspector = inspector.getMapValueObjectInspector(); for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) { recordConsumer.startGroup(); if (keyValue != null) { // write key element Object keyElement = keyValue.getKey(); recordConsumer.startField(keyName, 0); writeValue(keyElement, keyInspector, keyType); recordConsumer.endField(keyName, 0); // write value element Object valueElement = keyValue.getValue(); if (valueElement != null) { recordConsumer.startField(valueName, 1); writeValue(valueElement, valueInspector, valuetype); recordConsumer.endField(valueName, 1); } } recordConsumer.endGroup(); } recordConsumer.endField(repeatedType.getName(), 0); } recordConsumer.endGroup(); }
Example 20
Source File: ColumnIO.java From parquet-mr with Apache License 2.0 | 4 votes |
ColumnIO(Type type, GroupColumnIO parent, int index) { this.type = type; this.parent = parent; this.index = index; this.name = type.getName(); }