Java Code Examples for org.apache.parquet.schema.GroupType#getFieldCount()
The following examples show how to use
org.apache.parquet.schema.GroupType#getFieldCount() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataWritableGroupConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public DataWritableGroupConverter(final GroupType selectedGroupType, final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { this.parent = parent; this.index = index; final int totalFieldCount = containingGroupType.getFieldCount(); final int selectedFieldCount = selectedGroupType.getFieldCount(); currentArr = new Object[totalFieldCount]; converters = new Converter[selectedFieldCount]; List<Type> selectedFields = selectedGroupType.getFields(); for (int i = 0; i < selectedFieldCount; i++) { Type subtype = selectedFields.get(i); if (containingGroupType.getFields().contains(subtype)) { converters[i] = getConverterFromDescription(subtype, containingGroupType.getFieldIndex(subtype.getName()), this); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); } } }
Example 2
Source File: GroupWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
private void writeGroup(Group group, GroupType type) { int fieldCount = type.getFieldCount(); for (int field = 0; field < fieldCount; ++field) { int valueCount = group.getFieldRepetitionCount(field); if (valueCount > 0) { Type fieldType = type.getType(field); String fieldName = fieldType.getName(); recordConsumer.startField(fieldName, field); for (int index = 0; index < valueCount; ++index) { if (fieldType.isPrimitive()) { group.writeValue(field, index, recordConsumer); } else { recordConsumer.startGroup(); writeGroup(group.getGroup(field, index), fieldType.asGroupType()); recordConsumer.endGroup(); } } recordConsumer.endField(fieldName, field); } } }
Example 3
Source File: AvroRecordConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema, Schema avroSchema, GenericData model) { super(parent); GroupType parquetGroup = parquetSchema.asGroupType(); this.memberConverters = new Converter[ parquetGroup.getFieldCount()]; int parquetIndex = 0; for (int index = 0; index < avroSchema.getTypes().size(); index++) { Schema memberSchema = avroSchema.getTypes().get(index); if (!memberSchema.getType().equals(Schema.Type.NULL)) { Type memberType = parquetGroup.getType(parquetIndex); memberConverters[parquetIndex] = newConverter(memberSchema, memberType, model, new ParentValueContainer() { @Override public void add(Object value) { Preconditions.checkArgument( AvroUnionConverter.this.memberValue == null, "Union is resolving to more than one type"); memberValue = value; } }); parquetIndex++; // Note for nulls the parquetIndex id not increased } } }
Example 4
Source File: AvroIndexedRecordConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema, Schema avroSchema, GenericData model) { this.parent = parent; GroupType parquetGroup = parquetSchema.asGroupType(); this.memberConverters = new Converter[ parquetGroup.getFieldCount()]; int parquetIndex = 0; for (int index = 0; index < avroSchema.getTypes().size(); index++) { Schema memberSchema = avroSchema.getTypes().get(index); if (!memberSchema.getType().equals(Schema.Type.NULL)) { Type memberType = parquetGroup.getType(parquetIndex); memberConverters[parquetIndex] = newConverter(memberSchema, memberType, model, new ParentValueContainer() { @Override public void add(Object value) { Preconditions.checkArgument(memberValue==null, "Union is resolving to more than one type"); memberValue = value; } }); parquetIndex++; // Note for nulls the parquetIndex id not increased } } }
Example 5
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
MapConverter(List<TProtocol> parentEvents, GroupType parquetSchema, ThriftField field) { this.parentEvents = parentEvents; if (parquetSchema.getFieldCount() != 1) { throw new IllegalArgumentException("maps have only one field. " + parquetSchema + " size = " + parquetSchema.getFieldCount()); } Type nestedType = parquetSchema.getType(0); final ThriftField key = ((MapType)field.getType()).getKey(); keyType = key.getType().getType().getThriftType(); final ThriftField value = ((MapType)field.getType()).getValue(); valueType = value.getType().getType().getThriftType(); child = new GroupCounter(new MapKeyValueConverter(mapEvents, nestedType, key, value)); }
Example 6
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException { if (LOG.isDebugEnabled()) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema); if (bagType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType); } Type nested = bagType.getType(0); FieldSchema innerField = bagFieldSchema.schema.getField(0); if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) { // Bags always contain tuples => we skip the extra tuple that was inserted in that case. innerField = innerField.schema.getField(0); } return bagType.withNewFields(filter(nested, innerField)); }
Example 7
Source File: SimpleRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public SimpleRecordConverter(GroupType schema, String name, SimpleRecordConverter parent) { this.converters = new Converter[schema.getFieldCount()]; this.parent = parent; this.name = name; int i = 0; for (Type field: schema.getFields()) { converters[i++] = createConverter(field); } }
Example 8
Source File: DataWritableWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void writeArray(final ArrayWritable array, final GroupType type) { if (array == null) { return; } final Writable[] subValues = array.get(); final int fieldCount = type.getFieldCount(); for (int field = 0; field < fieldCount; ++field) { final Type subType = type.getType(field); recordConsumer.startField(subType.getName(), field); for (int i = 0; i < subValues.length; ++i) { final Writable subValue = subValues[i]; if (subValue != null) { if (subType.isPrimitive()) { if (subValue instanceof ArrayWritable) { writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ? } else { writePrimitive(subValue); } } else { if (!(subValue instanceof ArrayWritable)) { throw new RuntimeException("This should be a ArrayWritable: " + subValue); } else { recordConsumer.startGroup(); writeData((ArrayWritable) subValue, subType.asGroupType()); recordConsumer.endGroup(); } } } } recordConsumer.endField(subType.getName(), field); } }
Example 9
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public ParquetGroup(GroupType schema) { this.schema = schema; this.data = new List[schema.getFields().size()]; for (int i = 0; i < schema.getFieldCount(); ++i) { this.data[i] = new ArrayList(); } }
Example 10
Source File: RowConverter.java From flink with Apache License 2.0 | 5 votes |
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) { this.typeInfo = typeInfo; this.parentDataHolder = parent; this.posInParentRow = pos; this.converters = new Converter[schema.getFieldCount()]; int i = 0; if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) { for (Type field : schema.getFields()) { converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this); i++; } } }
Example 11
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) { List<Type> newFields = new ArrayList<Type>(); List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>(); try { if(requiredFieldsList == null) { int index = 0; for(FieldSchema fs : pigSchema.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++)); } } else { for(RequiredField rf : requiredFieldsList.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex())); } } for (Pair<FieldSchema, Integer> p : indexedFields) { FieldSchema fieldSchema = pigSchema.getField(p.first.alias); if (p.second < schemaToFilter.getFieldCount()) { Type type = schemaToFilter.getFields().get(p.second); newFields.add(filter(type, fieldSchema)); } } } catch (FrontendException e) { throw new RuntimeException("Failed to filter requested fields", e); } return newFields; }
Example 12
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private StructConverter(List<TProtocol> events, GroupType parquetSchema, ThriftField field) { this.events = events; this.name = field.getName(); this.tStruct = new TStruct(name); this.thriftType = (StructType)field.getType(); this.schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[this.schemaSize]; List<ThriftField> thriftChildren = thriftType.getChildren(); for (int i = 0; i < schemaSize; i++) { Type schemaType = parquetSchema.getType(i); String fieldName = schemaType.getName(); ThriftField matchingThrift = null; for (ThriftField childField: thriftChildren) { String thriftChildName = childField.getName(); if (thriftChildName != null && thriftChildName.equalsIgnoreCase(fieldName)) { matchingThrift = childField; break; } } if (matchingThrift == null) { // this means the file did not contain that field // it will never be populated in this instance // other files might populate it continue; } if (schemaType.isPrimitive()) { converters[i] = new PrimitiveFieldHandler(newConverter(events, schemaType, matchingThrift).asPrimitiveConverter(), matchingThrift, events); } else { converters[i] = new GroupFieldhandler(newConverter(events, schemaType, matchingThrift).asGroupConverter(), matchingThrift, events); } } }
Example 13
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 5 votes |
private static <T> T visitList(GroupType list, ParquetTypeVisitor<T> visitor) { Preconditions.checkArgument(!list.isRepetition(Type.Repetition.REPEATED), "Invalid list: top-level group is repeated: %s", list); Preconditions.checkArgument(list.getFieldCount() == 1, "Invalid list: does not contain single repeated field: %s", list); GroupType repeatedElement = list.getFields().get(0).asGroupType(); Preconditions.checkArgument(repeatedElement.isRepetition(Type.Repetition.REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: %s", list); visitor.beforeRepeatedElement(repeatedElement); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { Type elementField = repeatedElement.getType(0); visitor.beforeElementField(elementField); try { elementResult = visit(elementField, visitor); } finally { visitor.afterElementField(elementField); } } return visitor.list(list, elementResult); } finally { visitor.afterRepeatedElement(repeatedElement); } }
Example 14
Source File: RowConverter.java From flink with Apache License 2.0 | 5 votes |
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) { this.typeInfo = typeInfo; this.parentDataHolder = parent; this.posInParentRow = pos; this.converters = new Converter[schema.getFieldCount()]; int i = 0; if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) { for (Type field : schema.getFields()) { converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this); i++; } } }
Example 15
Source File: LogicalListL2Converter.java From dremio-oss with Apache License 2.0 | 4 votes |
private boolean isSupportedSchema(GroupType schema) { return schema.getFieldCount() == 1; }
Example 16
Source File: ParquetAsJsonInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 4 votes |
private void groupToJson(JsonGenerator currentGenerator, SimpleGroup grp) throws IOException { GroupType gt = grp.getType(); currentGenerator.writeStartObject(); for(int i = 0; i < gt.getFieldCount(); i ++) { String field = gt.getFieldName(i); try { Type t = gt.getType(i); int repetition = 1; boolean repeated = false; if (t.getRepetition() == Type.Repetition.REPEATED) { repeated = true; repetition = grp.getFieldRepetitionCount(i); currentGenerator.writeArrayFieldStart(field); } else currentGenerator.writeFieldName(field); for(int j = 0; j < repetition; j ++) { if (t.isPrimitive()) { switch (t.asPrimitiveType().getPrimitiveTypeName()) { case BINARY: currentGenerator.writeString(grp.getString(i, j)); break; case INT32: currentGenerator.writeNumber(grp.getInteger(i, j)); break; case INT96: case INT64: // clumsy way - TODO - Subclass SimpleGroup or something like that currentGenerator.writeNumber(Long.parseLong(grp.getValueToString(i, j))); break; case DOUBLE: case FLOAT: currentGenerator.writeNumber(Double.parseDouble(grp.getValueToString(i, j))); break; case BOOLEAN: currentGenerator.writeBoolean(grp.getBoolean(i, j)); break; default: throw new RuntimeException("Can't handle type " + gt.getType(i)); } } else { groupToJson(currentGenerator, (SimpleGroup) grp.getGroup(i, j)); } } if (repeated) currentGenerator.writeEndArray(); } catch (Exception e) { if (e.getMessage().startsWith("not found") && gt.getType(i).getRepetition() == Type.Repetition.OPTIONAL) currentGenerator.writeNull(); else throw new RuntimeException(e); } } currentGenerator.writeEndObject(); }
Example 17
Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0 | 4 votes |
private static <T> T visitMap(GroupType map, ParquetTypeVisitor<T> visitor) { Preconditions.checkArgument(!map.isRepetition(Type.Repetition.REPEATED), "Invalid map: top-level group is repeated: %s", map); Preconditions.checkArgument(map.getFieldCount() == 1, "Invalid map: does not contain single repeated field: %s", map); GroupType repeatedKeyValue = map.getType(0).asGroupType(); Preconditions.checkArgument(repeatedKeyValue.isRepetition(Type.Repetition.REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); visitor.beforeRepeatedKeyValue(repeatedKeyValue); try { T keyResult = null; T valueResult = null; switch (repeatedKeyValue.getFieldCount()) { case 2: // if there are 2 fields, both key and value are projected Type keyType = repeatedKeyValue.getType(0); visitor.beforeKeyField(keyType); try { keyResult = visit(keyType, visitor); } finally { visitor.afterKeyField(keyType); } Type valueType = repeatedKeyValue.getType(1); visitor.beforeValueField(valueType); try { valueResult = visit(valueType, visitor); } finally { visitor.afterValueField(valueType); } break; case 1: // if there is just one, use the name to determine what it is Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { visitor.beforeKeyField(keyOrValue); try { keyResult = visit(keyOrValue, visitor); } finally { visitor.afterKeyField(keyOrValue); } // value result remains null } else { visitor.beforeValueField(keyOrValue); try { valueResult = visit(keyOrValue, visitor); } finally { visitor.afterValueField(keyOrValue); } // key result remains null } break; default: // both results will remain null } return visitor.map(map, keyResult, valueResult); } finally { visitor.afterRepeatedKeyValue(repeatedKeyValue); } }
Example 18
Source File: ParquetTypeVisitor.java From presto with Apache License 2.0 | 4 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation == LIST) { checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, "Invalid list: does not contain single repeated field: " + group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: " + group); visitor.fieldNames.push(repeatedElement.getName()); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { elementResult = visitField(repeatedElement.getType(0), visitor); } return visitor.list(group, elementResult); } finally { visitor.fieldNames.pop(); } } else if (annotation == MAP) { checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, "Invalid map: does not contain single repeated field: " + group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); checkArgument(repeatedKeyValue.isRepetition(REPEATED), "Invalid map: inner group is not repeated"); checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); visitor.fieldNames.push(repeatedKeyValue.getName()); try { T keyResult = null; T valueResult = null; if (repeatedKeyValue.getFieldCount() == 2) { keyResult = visitField(repeatedKeyValue.getType(0), visitor); valueResult = visitField(repeatedKeyValue.getType(1), visitor); } else if (repeatedKeyValue.getFieldCount() == 1) { Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { keyResult = visitField(keyOrValue, visitor); // value result remains null } else { valueResult = visitField(keyOrValue, visitor); // key result remains null } } return visitor.map(group, keyResult, valueResult); } finally { visitor.fieldNames.pop(); } } return visitor.struct(group, visitFields(group, visitor)); } }
Example 19
Source File: DataWritableGroupConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) { this(requestedSchema, null, 0, tableSchema); final int fieldCount = tableSchema.getFieldCount(); this.rootMap = new Writable[fieldCount]; }
Example 20
Source File: LogicalListL1Converter.java From dremio-oss with Apache License 2.0 | 3 votes |
/** * Checks if the schema is similar to the following: * <pre> * optional group <name> (LIST) { * repeated group <list-name> { * <element-repetition> <element-type> <element-name>; * } * } * </pre> * * @param schema parquet group type * @return true is supported */ public static boolean isSupportedSchema(GroupType schema) { if (schema.getFieldCount() == 1) { Type type = schema.getType(0); // check: repeated group if (type.isPrimitive() || !type.isRepetition(REPEATED) || type.getOriginalType() != null) { return false; } return type.asGroupType().getFieldCount() == 1; } return false; }