Java Code Examples for org.apache.parquet.schema.Type#isRepetition()
The following examples show how to use
org.apache.parquet.schema.Type#isRepetition() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSchemaConverter190Int96Avro17.java From datacollector with Apache License 2.0 | 7 votes |
private Schema convertFields(String name, List<Type> parquetFields) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Type parquetType : parquetFields) { Schema fieldSchema = convertField(parquetType); if (parquetType.isRepetition(REPEATED)) { throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType); } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) { fields.add(new Schema.Field( parquetType.getName(), optional(fieldSchema), null, NULL_VALUE)); } else { // REQUIRED fields.add(new Schema.Field( parquetType.getName(), fieldSchema, null, (Object) null)); } } Schema schema = Schema.createRecord(name, null, null, false); schema.setFields(fields); return schema; }
Example 2
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 6 votes |
@Override protected void writeObjectArray(GroupType type, Schema schema, Object[] array) { if (array.length > 0) { recordConsumer.startField(LIST_REPEATED_NAME, 0); GroupType repeatedType = type.getType(0).asGroupType(); Type elementType = repeatedType.getType(0); for (Object element : array) { recordConsumer.startGroup(); // repeated group array, middle layer if (element != null) { recordConsumer.startField(LIST_ELEMENT_NAME, 0); writeValue(elementType, schema.getElementType(), element); recordConsumer.endField(LIST_ELEMENT_NAME, 0); } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException( "Null list element for " + schema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(LIST_REPEATED_NAME, 0); } }
Example 3
Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) { List<Type> fields = schema.getFields(); List<Schema.Field> avroFields = avroSchema.getFields(); int index = 0; // parquet ignores Avro nulls, so index may differ for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) { Schema.Field avroField = avroFields.get(avroIndex); if (avroField.schema().getType().equals(Schema.Type.NULL)) { continue; } Type fieldType = fields.get(index); Object value = model.getField(record, avroField.name(), avroIndex); if (value != null) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, avroField.schema(), value); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + avroField.name()); } index++; } }
Example 4
Source File: AvroSchemaConverter190Int96Avro18.java From datacollector with Apache License 2.0 | 6 votes |
private Schema convertFields(String name, List<Type> parquetFields) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Type parquetType : parquetFields) { Schema fieldSchema = convertField(parquetType); if (parquetType.isRepetition(REPEATED)) { throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType); } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) { fields.add(new Schema.Field( parquetType.getName(), optional(fieldSchema), null, NULL_VALUE)); } else { // REQUIRED fields.add(new Schema.Field( parquetType.getName(), fieldSchema, null, (Object) null)); } } Schema schema = Schema.createRecord(name, null, null, false); schema.setFields(fields); return schema; }
Example 5
Source File: TajoWriteSupport.java From tajo with Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema tajoSchema, Tuple tuple) { List<Type> fields = schema.getFields(); // Parquet ignores Tajo NULL_TYPE columns, so the index may differ. int index = 0; for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) { Column column = tajoSchema.getColumn(tajoIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) { continue; } Type fieldType = fields.get(index); if (!tuple.isBlankOrNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); writeValue(column, tuple, tajoIndex); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + column.getSimpleName()); } ++index; } }
Example 6
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 6 votes |
@Override protected void writeObjectArray(GroupType type, Schema schema, Object[] array) { if (array.length > 0) { recordConsumer.startField(LIST_REPEATED_NAME, 0); GroupType repeatedType = type.getType(0).asGroupType(); Type elementType = repeatedType.getType(0); for (Object element : array) { recordConsumer.startGroup(); // repeated group array, middle layer if (element != null) { recordConsumer.startField(LIST_ELEMENT_NAME, 0); writeValue(elementType, schema.getElementType(), element); recordConsumer.endField(LIST_ELEMENT_NAME, 0); } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException( "Null list element for " + schema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(LIST_REPEATED_NAME, 0); } }
Example 7
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public ElementConverter(String listName, List<TProtocol> listEvents, GroupType repeatedType, ThriftField thriftElement) { this.listEvents = listEvents; this.elementEvents = new ArrayList<TProtocol>(); Type elementType = repeatedType.getType(0); if (elementType.isRepetition(Type.Repetition.OPTIONAL)) { if (ignoreNullElements) { LOG.warn("List " + listName + " has optional elements: null elements are ignored."); } else { throw new ParquetDecodingException("Cannot read list " + listName + " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS + " to ignore nulls."); } } elementConverter = newConverter(elementEvents, elementType, thriftElement); }
Example 8
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 6 votes |
@Override protected void writeCollection(GroupType type, Schema schema, Collection<?> collection) { if (collection.size() > 0) { recordConsumer.startField(LIST_REPEATED_NAME, 0); GroupType repeatedType = type.getType(0).asGroupType(); Type elementType = repeatedType.getType(0); for (Object element : collection) { recordConsumer.startGroup(); // repeated group array, middle layer if (element != null) { recordConsumer.startField(LIST_ELEMENT_NAME, 0); writeValue(elementType, schema.getElementType(), element); recordConsumer.endField(LIST_ELEMENT_NAME, 0); } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException( "Null list element for " + schema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(LIST_REPEATED_NAME, 0); } }
Example 9
Source File: SimpleGroup.java From parquet-mr with Apache License 2.0 | 5 votes |
private void add(int fieldIndex, Primitive value) { Type type = schema.getType(fieldIndex); List<Object> list = data[fieldIndex]; if (!type.isRepetition(Type.Repetition.REPEATED) && !list.isEmpty()) { throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list); } list.add(value); }
Example 10
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public void add(int fieldIndex, Primitive value) { Type type = this.schema.getType(fieldIndex); List<Object> list = this.data[fieldIndex]; if (!type.isRepetition(REPEATED) && !list.isEmpty()) { throw new IllegalStateException( "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list); } else { list.add(value); } }
Example 11
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 5 votes |
private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) { GroupType innerGroup = schema.getType(0).asGroupType(); Type keyType = innerGroup.getType(0); Type valueType = innerGroup.getType(1); recordConsumer.startGroup(); // group wrapper (original type MAP) if (map.size() > 0) { recordConsumer.startField(MAP_REPEATED_NAME, 0); for (Map.Entry<CharSequence, V> entry : map.entrySet()) { recordConsumer.startGroup(); // repeated group key_value, middle layer recordConsumer.startField(MAP_KEY_NAME, 0); writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey()); recordConsumer.endField(MAP_KEY_NAME, 0); V value = entry.getValue(); if (value != null) { recordConsumer.startField(MAP_VALUE_NAME, 1); writeValue(valueType, avroSchema.getValueType(), value); recordConsumer.endField(MAP_VALUE_NAME, 1); } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException("Null map value for " + avroSchema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(MAP_REPEATED_NAME, 0); } recordConsumer.endGroup(); }
Example 12
Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) { GroupType innerGroup = schema.getType(0).asGroupType(); Type keyType = innerGroup.getType(0); Type valueType = innerGroup.getType(1); recordConsumer.startGroup(); // group wrapper (original type MAP) if (map.size() > 0) { recordConsumer.startField(MAP_REPEATED_NAME, 0); for (Map.Entry<CharSequence, V> entry : map.entrySet()) { recordConsumer.startGroup(); // repeated group key_value, middle layer recordConsumer.startField(MAP_KEY_NAME, 0); writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey()); recordConsumer.endField(MAP_KEY_NAME, 0); V value = entry.getValue(); if (value != null) { recordConsumer.startField(MAP_VALUE_NAME, 1); writeValue(valueType, avroSchema.getValueType(), value); recordConsumer.endField(MAP_VALUE_NAME, 1); } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) { throw new RuntimeException("Null map value for " + avroSchema.getName()); } recordConsumer.endGroup(); } recordConsumer.endField(MAP_REPEATED_NAME, 0); } recordConsumer.endGroup(); }
Example 13
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void write(ParquetWriter<Group> writer) throws IOException { for (int index = 0; index < recordCount; index++) { Group group = new SimpleGroup(super.schema); for (int column = 0, columnCnt = schema.getFieldCount(); column < columnCnt; ++column) { Type type = schema.getType(column); RandomValueGenerator<?> generator = randomGenerators.get(column); if (type.isRepetition(OPTIONAL) && generator.shouldGenerateNull()) { continue; } switch (type.asPrimitiveType().getPrimitiveTypeName()) { case BINARY: case FIXED_LEN_BYTE_ARRAY: case INT96: group.append(type.getName(), ((RandomBinaryBase<?>) generator).nextBinaryValue()); break; case INT32: group.append(type.getName(), (Integer) generator.nextValue()); break; case INT64: group.append(type.getName(), (Long) generator.nextValue()); break; case FLOAT: group.append(type.getName(), (Float) generator.nextValue()); break; case DOUBLE: group.append(type.getName(), (Double) generator.nextValue()); break; case BOOLEAN: group.append(type.getName(), (Boolean) generator.nextValue()); break; } } writer.write(group); } }
Example 14
Source File: TupleConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override final public void start() { currentTuple = TF.newTuple(schemaSize); if (elephantBirdCompatible) { try { int i = 0; for (Type field : parquetSchema.getFields()) { if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) { PrimitiveType primitiveType = field.asPrimitiveType(); switch (primitiveType.getPrimitiveTypeName()) { case INT32: currentTuple.set(i, I32_ZERO); break; case INT64: currentTuple.set(i, I64_ZERO); break; case FLOAT: currentTuple.set(i, FLOAT_ZERO); break; case DOUBLE: currentTuple.set(i, DOUBLE_ZERO); break; case BOOLEAN: currentTuple.set(i, I32_ZERO); break; } } ++ i; } } catch (ExecException e) { throw new RuntimeException(e); } } }
Example 15
Source File: TajoSchemaConverter.java From tajo with Apache License 2.0 | 5 votes |
private Schema convertFields(List<Type> parquetFields) { List<Column> columns = new ArrayList<>(); for (Type fieldType : parquetFields) { if (fieldType.isRepetition(Type.Repetition.REPEATED)) { throw new RuntimeException("REPEATED not supported outside LIST or" + " MAP. Type: " + fieldType); } columns.add(convertField(fieldType)); } Column[] columnsArray = new Column[columns.size()]; columnsArray = columns.toArray(columnsArray); return SchemaBuilder.builder().addAll(columnsArray).build(); }
Example 16
Source File: ParquetValueReaders.java From iceberg with Apache License 2.0 | 5 votes |
public static <T> ParquetValueReader<T> option(Type type, int definitionLevel, ParquetValueReader<T> reader) { if (type.isRepetition(Type.Repetition.OPTIONAL)) { return new OptionReader<>(definitionLevel, reader); } return reader; }
Example 17
Source File: ParquetValueWriters.java From iceberg with Apache License 2.0 | 5 votes |
public static <T> ParquetValueWriter<T> option(Type type, int definitionLevel, ParquetValueWriter<T> writer) { if (type.isRepetition(Type.Repetition.OPTIONAL)) { return new OptionWriter<>(definitionLevel, writer); } return writer; }
Example 18
Source File: TestColumnIndexes.java From parquet-mr with Apache License 2.0 | 5 votes |
private Group createGroup(List<Supplier<?>> generators, Random random) { Group group = FACTORY.newGroup(); for (int column = 0, columnCnt = SCHEMA.getFieldCount(); column < columnCnt; ++column) { Type type = SCHEMA.getType(column); Supplier<?> generator = generators.get(column); // 2% chance of null value for an optional column if (generator == null || (type.isRepetition(OPTIONAL) && random.nextInt(50) == 0)) { continue; } switch (type.asPrimitiveType().getPrimitiveTypeName()) { case BINARY: case FIXED_LEN_BYTE_ARRAY: case INT96: group.append(type.getName(), (Binary) generator.get()); break; case INT32: group.append(type.getName(), (Integer) generator.get()); break; case INT64: group.append(type.getName(), (Long) generator.get()); break; case FLOAT: group.append(type.getName(), (Float) generator.get()); break; case DOUBLE: group.append(type.getName(), (Double) generator.get()); break; case BOOLEAN: group.append(type.getName(), (Boolean) generator.get()); break; } } return group; }
Example 19
Source File: HiveClient.java From garmadon with Apache License 2.0 | 5 votes |
protected String inferHiveType(Type field) throws Exception { String fieldHiveType; switch (field.asPrimitiveType().getPrimitiveTypeName().name()) { case "BINARY": fieldHiveType = "string"; break; case "INT32": fieldHiveType = "int"; break; case "INT64": fieldHiveType = "bigint"; break; case "FLOAT": fieldHiveType = "float"; break; case "DOUBLE": fieldHiveType = "double"; break; case "BOOLEAN": fieldHiveType = "boolean"; break; default: throw new Exception("Unsupported Data Type: " + field.asPrimitiveType().getPrimitiveTypeName().name()); } if (field.isRepetition(Type.Repetition.REPEATED)) { fieldHiveType = "array<" + fieldHiveType + ">"; } return fieldHiveType; }
Example 20
Source File: MapKeyValuesSchemaConverter.java From presto with Apache License 2.0 | 5 votes |
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }