Java Code Examples for org.apache.parquet.schema.GroupType#getType()
The following examples show how to use
org.apache.parquet.schema.GroupType#getType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDataWritableWriter.java From presto with Apache License 2.0 | 6 votes |
private void writeSingleLevelArray(Object value, ListObjectInspector inspector, GroupType type) { // Get the internal array structure Type elementType = type.getType(0); recordConsumer.startGroup(); List<?> arrayValues = inspector.getList(value); if (!arrayValues.isEmpty()) { recordConsumer.startField(elementType.getName(), 0); ObjectInspector elementInspector = inspector.getListElementObjectInspector(); for (Object element : arrayValues) { if (element == null) { throw new IllegalArgumentException("Array elements are requires in given schema definition"); } writeValue(element, elementInspector, elementType); } recordConsumer.endField(elementType.getName(), 0); } recordConsumer.endGroup(); }
Example 2
Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override protected void writeCollection(GroupType type, Schema schema, Collection<?> collection) { if (collection.size() > 0) { recordConsumer.startField(LIST_REPEATED_NAME, 0); GroupType repeatedType = type.getType(0).asGroupType(); Type elementType = repeatedType.getType(0); for (Object element : collection) { recordConsumer.startGroup(); // repeated group array, middle layer if (element != null) { recordConsumer.startField(LIST_ELEMENT_NAME, 0); writeValue(elementType, schema.getElementType(), element); recordConsumer.endField(LIST_ELEMENT_NAME, 0); } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException( "Null list element for " + schema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(LIST_REPEATED_NAME, 0); } }
Example 3
Source File: ColumnIOFactory.java From parquet-mr with Apache License 2.0 | 6 votes |
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) { GroupColumnIO oldIO = current; current = newIO; for (Type type : groupType.getFields()) { // if the file schema does not contain the field it will just stay null if (requestedGroupType.containsField(type.getName())) { currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName()); currentRequestedType = requestedGroupType.getType(currentRequestedIndex); if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { incompatibleSchema(type, currentRequestedType); } type.accept(this); } } current = oldIO; }
Example 4
Source File: TestDataWritableWriter.java From presto with Apache License 2.0 | 6 votes |
/** * It writes all the fields contained inside a group to the RecordConsumer. * * @param value The list of values contained in the group. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the group schema. */ private void writeGroupFields(Object value, StructObjectInspector inspector, GroupType type) { if (value != null) { List<? extends StructField> fields = inspector.getAllStructFieldRefs(); List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value); for (int i = 0; i < type.getFieldCount(); i++) { Type fieldType = type.getType(i); String fieldName = fieldType.getName(); Object fieldValue = fieldValuesList.get(i); if (fieldValue != null) { ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector(); recordConsumer.startField(fieldName, i); writeValue(fieldValue, fieldInspector, fieldType); recordConsumer.endField(fieldName, i); } } } }
Example 5
Source File: PigParquetReader.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueReader<?> list( Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1; return new ArrayReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader)); }
Example 6
Source File: TupleConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public TupleConverter(GroupType parquetSchema) { int schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[schemaSize]; for (int i = 0; i < schemaSize; i++) { Type type = parquetSchema.getType(i); converters[i] = newConverter(type, i); } }
Example 7
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
MapConverter(List<TProtocol> parentEvents, GroupType parquetSchema, ThriftField field) { this.parentEvents = parentEvents; if (parquetSchema.getFieldCount() != 1) { throw new IllegalArgumentException("maps have only one field. " + parquetSchema + " size = " + parquetSchema.getFieldCount()); } Type nestedType = parquetSchema.getType(0); final ThriftField key = ((MapType)field.getType()).getKey(); keyType = key.getType().getType().getThriftType(); final ThriftField value = ((MapType)field.getType()).getValue(); valueType = value.getType().getType().getThriftType(); child = new GroupCounter(new MapKeyValueConverter(mapEvents, nestedType, key, value)); }
Example 8
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException { if (LOG.isDebugEnabled()) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema); if (bagType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType); } Type nested = bagType.getType(0); FieldSchema innerField = bagFieldSchema.schema.getField(0); if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) { // Bags always contain tuples => we skip the extra tuple that was inserted in that case. innerField = innerField.schema.getField(0); } return bagType.withNewFields(filter(nested, innerField)); }
Example 9
Source File: GenericParquetReaders.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath)-1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath)-1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName()))-1; return new ListReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader)); }
Example 10
Source File: DataWritableWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void writeData(final ArrayWritable arr, final GroupType type) { if (arr == null) { return; } final int fieldCount = type.getFieldCount(); Writable[] values = arr.get(); for (int field = 0; field < fieldCount; ++field) { final Type fieldType = type.getType(field); final String fieldName = fieldType.getName(); final Writable value = values[field]; if (value == null) { continue; } recordConsumer.startField(fieldName, field); if (fieldType.isPrimitive()) { writePrimitive(value); } else { recordConsumer.startGroup(); if (value instanceof ArrayWritable) { if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { writeArray((ArrayWritable) value, fieldType.asGroupType()); } else { writeData((ArrayWritable) value, fieldType.asGroupType()); } } else if (value != null) { throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); } recordConsumer.endGroup(); } recordConsumer.endField(fieldName, field); } }
Example 11
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> T visitList(GroupType list, ParquetTypeVisitor<T> visitor) { Preconditions.checkArgument(!list.isRepetition(Type.Repetition.REPEATED), "Invalid list: top-level group is repeated: %s", list); Preconditions.checkArgument(list.getFieldCount() == 1, "Invalid list: does not contain single repeated field: %s", list); GroupType repeatedElement = list.getFields().get(0).asGroupType(); Preconditions.checkArgument(repeatedElement.isRepetition(Type.Repetition.REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: %s", list); visitor.beforeRepeatedElement(repeatedElement); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { Type elementField = repeatedElement.getType(0); visitor.beforeElementField(elementField); try { elementResult = visit(elementField, visitor); } finally { visitor.afterElementField(elementField); } } return visitor.list(list, elementResult); } finally { visitor.afterRepeatedElement(repeatedElement); } }
Example 12
Source File: ParquetAvroWriter.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueWriter<?> list(GroupType array, ParquetValueWriter<?> elementWriter) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath); int repeatedR = type.getMaxRepetitionLevel(repeatedPath); org.apache.parquet.schema.Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())); return collections(repeatedD, repeatedR, option(elementType, elementD, elementWriter)); }
Example 13
Source File: AvroRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public ElementConverter(GroupType repeatedType, Schema elementSchema, GenericData model) { Type elementType = repeatedType.getType(0); Schema nonNullElementSchema = AvroSchemaConverter.getNonNull(elementSchema); this.elementConverter = newConverter(nonNullElementSchema, elementType, model, new ParentValueContainer() { @Override @SuppressWarnings("unchecked") public void add(Object value) { ElementConverter.this.element = value; } }); }
Example 14
Source File: GenericParquetWriter.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueWriter<?> list(GroupType array, ParquetValueWriter<?> elementWriter) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath); int repeatedR = type.getMaxRepetitionLevel(repeatedPath); org.apache.parquet.schema.Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())); return ParquetValueWriters.collections(repeatedD, repeatedR, ParquetValueWriters.option(elementType, elementD, elementWriter)); }
Example 15
Source File: ParquetAvroValueReaders.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath)-1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath)-1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName()))-1; return new ListReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader)); }
Example 16
Source File: TupleConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
BagConverter(GroupType parquetSchema, FieldSchema pigSchema, ParentValueContainer parent, boolean numbersDefaultToZero, boolean columnIndexAccess) throws FrontendException { this.parent = parent; if (parquetSchema.getFieldCount() != 1) { throw new IllegalArgumentException("bags have only one field. " + parquetSchema + " size = " + parquetSchema.getFieldCount()); } Type nestedType = parquetSchema.getType(0); ParentValueContainer childsParent; FieldSchema pigField; if (nestedType.isPrimitive() || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) { // Pig bags always contain tuples // In that case we need to wrap the value in an extra tuple childsParent = new ParentValueContainer() { @Override void add(Object value) { buffer.add(TF.newTuple(value)); }}; pigField = pigSchema.schema.getField(0).schema.getField(0); } else { childsParent = new ParentValueContainer() { @Override void add(Object value) { buffer.add((Tuple)value); }}; pigField = pigSchema.schema.getField(0); } child = newConverter(pigField, nestedType, childsParent, numbersDefaultToZero, columnIndexAccess); }
Example 17
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 5 votes |
private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) { GroupType innerGroup = schema.getType(0).asGroupType(); Type keyType = innerGroup.getType(0); Type valueType = innerGroup.getType(1); recordConsumer.startGroup(); // group wrapper (original type MAP) if (map.size() > 0) { recordConsumer.startField(MAP_REPEATED_NAME, 0); for (Map.Entry<CharSequence, V> entry : map.entrySet()) { recordConsumer.startGroup(); // repeated group key_value, middle layer recordConsumer.startField(MAP_KEY_NAME, 0); writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey()); recordConsumer.endField(MAP_KEY_NAME, 0); V value = entry.getValue(); if (value != null) { recordConsumer.startField(MAP_VALUE_NAME, 1); writeValue(valueType, avroSchema.getValueType(), value); recordConsumer.endField(MAP_VALUE_NAME, 1); } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException("Null map value for " + avroSchema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(MAP_REPEATED_NAME, 0); } recordConsumer.endGroup(); }
Example 18
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 5 votes |
private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) { GroupType innerGroup = schema.getType(0).asGroupType(); Type keyType = innerGroup.getType(0); Type valueType = innerGroup.getType(1); recordConsumer.startGroup(); // group wrapper (original type MAP) if (map.size() > 0) { recordConsumer.startField(MAP_REPEATED_NAME, 0); for (Map.Entry<CharSequence, V> entry : map.entrySet()) { recordConsumer.startGroup(); // repeated group key_value, middle layer recordConsumer.startField(MAP_KEY_NAME, 0); writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey()); recordConsumer.endField(MAP_KEY_NAME, 0); V value = entry.getValue(); if (value != null) { recordConsumer.startField(MAP_VALUE_NAME, 1); writeValue(valueType, avroSchema.getValueType(), value); recordConsumer.endField(MAP_VALUE_NAME, 1); } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException("Null map value for " + avroSchema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(MAP_REPEATED_NAME, 0); } recordConsumer.endGroup(); }
Example 19
Source File: ParquetAsJsonInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 4 votes |
private void groupToJson(JsonGenerator currentGenerator, SimpleGroup grp) throws IOException { GroupType gt = grp.getType(); currentGenerator.writeStartObject(); for(int i = 0; i < gt.getFieldCount(); i ++) { String field = gt.getFieldName(i); try { Type t = gt.getType(i); int repetition = 1; boolean repeated = false; if (t.getRepetition() == Type.Repetition.REPEATED) { repeated = true; repetition = grp.getFieldRepetitionCount(i); currentGenerator.writeArrayFieldStart(field); } else currentGenerator.writeFieldName(field); for(int j = 0; j < repetition; j ++) { if (t.isPrimitive()) { switch (t.asPrimitiveType().getPrimitiveTypeName()) { case BINARY: currentGenerator.writeString(grp.getString(i, j)); break; case INT32: currentGenerator.writeNumber(grp.getInteger(i, j)); break; case INT96: case INT64: // clumsy way - TODO - Subclass SimpleGroup or something like that currentGenerator.writeNumber(Long.parseLong(grp.getValueToString(i, j))); break; case DOUBLE: case FLOAT: currentGenerator.writeNumber(Double.parseDouble(grp.getValueToString(i, j))); break; case BOOLEAN: currentGenerator.writeBoolean(grp.getBoolean(i, j)); break; default: throw new RuntimeException("Can't handle type " + gt.getType(i)); } } else { groupToJson(currentGenerator, (SimpleGroup) grp.getGroup(i, j)); } } if (repeated) currentGenerator.writeEndArray(); } catch (Exception e) { if (e.getMessage().startsWith("not found") && gt.getType(i).getRepetition() == Type.Repetition.OPTIONAL) currentGenerator.writeNull(); else throw new RuntimeException(e); } } currentGenerator.writeEndObject(); }
Example 20
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 4 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation != null) { switch (annotation) { case LIST: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid list: does not contain single repeated field: " + group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: " + group); visitor.fieldNames.push(repeatedElement.getName()); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { elementResult = visitField(repeatedElement.getType(0), visitor); } return visitor.list(group, elementResult); } finally { visitor.fieldNames.pop(); } case MAP: Preconditions.checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid map: does not contain single repeated field: " + group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); visitor.fieldNames.push(repeatedKeyValue.getName()); try { T keyResult = null; T valueResult = null; switch (repeatedKeyValue.getFieldCount()) { case 2: // if there are 2 fields, both key and value are projected keyResult = visitField(repeatedKeyValue.getType(0), visitor); valueResult = visitField(repeatedKeyValue.getType(1), visitor); case 1: // if there is just one, use the name to determine what it is Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { keyResult = visitField(keyOrValue, visitor); // value result remains null } else { valueResult = visitField(keyOrValue, visitor); // key result remains null } default: // both results will remain null } return visitor.map(group, keyResult, valueResult); } finally { visitor.fieldNames.pop(); } default: } } return visitor.struct(group, visitFields(group, visitor)); } }