org.apache.parquet.schema.Type Java Exaples

Source File: AvroRecordConverter.java From parquet-mr with Apache License 2.0

6 votes

public AvroCollectionConverter(ParentValueContainer parent, GroupType type,
                               Schema avroSchema, GenericData model,
                               Class<?> containerClass) {
  this.parent = parent;
  this.avroSchema = avroSchema;
  this.containerClass = containerClass;
  Schema elementSchema = AvroSchemaConverter.getNonNull(avroSchema.getElementType());
  Type repeatedType = type.getType(0);
  // always determine whether the repeated type is the element type by
  // matching it against the element schema.
  if (isElementType(repeatedType, elementSchema)) {
    // the element type is the repeated type (and required)
    converter = newConverter(elementSchema, repeatedType, model, new ParentValueContainer() {
      @Override
      @SuppressWarnings("unchecked")
      public void add(Object value) {
        container.add(value);
      }
    });
  } else {
    // the element is wrapped in a synthetic group and may be optional
    converter = new ElementConverter(repeatedType.asGroupType(), elementSchema, model);
  }
}

Source File: TestPruneColumnsCommand.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testPruneMultiColumns() throws Exception {
  // Create Parquet file
  String inputFile = createParquetFile("input");
  String outputFile = createTempFile("output");

  // Remove columns
  String cargs[] = {inputFile, outputFile, "Name", "Gender"};
  executeCommandLine(cargs);

  // Verify the schema are not changed for the columns not pruned
  ParquetMetadata pmd = ParquetFileReader.readFooter(conf, new Path(outputFile), ParquetMetadataConverter.NO_FILTER);
  MessageType schema = pmd.getFileMetaData().getSchema();
  List<Type> fields = schema.getFields();
  assertEquals(fields.size(), 2);
  assertEquals(fields.get(0).getName(), "DocId");
  assertEquals(fields.get(1).getName(), "Links");
  List<Type> subFields = fields.get(1).asGroupType().getFields();
  assertEquals(subFields.size(), 2);
  assertEquals(subFields.get(0).getName(), "Backward");
  assertEquals(subFields.get(1).getName(), "Forward");

  // Verify the data are not changed for the columns not pruned
  List<String> prunePaths = Arrays.asList("Name", "Gender");
  validateColumns(inputFile, prunePaths);
}

Source File: ThriftSchemaConverter.java From parquet-mr with Apache License 2.0

6 votes

/**
 * Returns whether the given type is the element type of a list or is a
 * synthetic group with one field that is the element type. This is
 * determined by checking whether the type can be a synthetic group and by
 * checking whether a potential synthetic group matches the expected
 * ThriftField.
 * <p>
 * This method never guesses because the expected ThriftField is known.
 *
 * @param repeatedType a type that may be the element type
 * @param thriftElement the expected Schema for list elements
 * @return {@code true} if the repeatedType is the element schema
 */
static boolean isListElementType(Type repeatedType,
                                 ThriftField thriftElement) {
  if (repeatedType.isPrimitive() ||
      (repeatedType.asGroupType().getFieldCount() != 1) ||
      (repeatedType.asGroupType().getType(0).isRepetition(REPEATED))) {
    // The repeated type must be the element type because it is an invalid
    // synthetic wrapper. Must be a group with one optional or required field
    return true;
  } else if (thriftElement != null && thriftElement.getType() instanceof StructType) {
    Set<String> fieldNames = new HashSet<String>();
    for (ThriftField field : ((StructType) thriftElement.getType()).getChildren()) {
      fieldNames.add(field.getName());
    }
    // If the repeated type is a subset of the structure of the ThriftField,
    // then it must be the element type.
    return fieldNames.contains(repeatedType.asGroupType().getFieldName(0));
  }
  return false;
}

Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0

6 votes

public ElementConverter(String listName, List<TProtocol> listEvents,
                        GroupType repeatedType, ThriftField thriftElement) {
  this.listEvents = listEvents;
  this.elementEvents = new ArrayList<TProtocol>();
  Type elementType = repeatedType.getType(0);
  if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
    if (ignoreNullElements) {
      LOG.warn("List " + listName +
          " has optional elements: null elements are ignored.");
    } else {
      throw new ParquetDecodingException("Cannot read list " + listName +
          " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS +
          " to ignore nulls.");
    }
  }
  elementConverter = newConverter(elementEvents, elementType, thriftElement);
}

Source File: SimpleGroupConverter.java From parquet-mr with Apache License 2.0

6 votes

SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) {
  this.parent = parent;
  this.index = index;

  converters = new Converter[schema.getFieldCount()];

  for (int i = 0; i < converters.length; i++) {
    final Type type = schema.getType(i);
    if (type.isPrimitive()) {
      converters[i] = new SimplePrimitiveConverter(this, i);
    } else {
      converters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
    }

  }
}

Source File: GroupWriter.java From parquet-mr with Apache License 2.0

6 votes

private void writeGroup(Group group, GroupType type) {
  int fieldCount = type.getFieldCount();
  for (int field = 0; field < fieldCount; ++field) {
    int valueCount = group.getFieldRepetitionCount(field);
    if (valueCount > 0) {
      Type fieldType = type.getType(field);
      String fieldName = fieldType.getName();
      recordConsumer.startField(fieldName, field);
      for (int index = 0; index < valueCount; ++index) {
        if (fieldType.isPrimitive()) {
          group.writeValue(field, index, recordConsumer);
        } else {
          recordConsumer.startGroup();
          writeGroup(group.getGroup(field, index), fieldType.asGroupType());
          recordConsumer.endGroup();
        }
      }
      recordConsumer.endField(fieldName, field);
    }
  }
}

Source File: ProtoWriteSupport.java From parquet-mr with Apache License 2.0

6 votes

private FieldWriter createWriter(FieldDescriptor fieldDescriptor, Type type) {

      switch (fieldDescriptor.getJavaType()) {
        case STRING: return new StringWriter() ;
        case MESSAGE: return createMessageWriter(fieldDescriptor, type);
        case INT: return new IntWriter();
        case LONG: return new LongWriter();
        case FLOAT: return new FloatWriter();
        case DOUBLE: return new DoubleWriter();
        case ENUM: return new EnumWriter();
        case BOOLEAN: return new BooleanWriter();
        case BYTE_STRING: return new BinaryWriter();
      }

      return unknownType(fieldDescriptor);//should not be executed, always throws exception.
    }

Source File: ParquetSchemaUtil.java From iceberg with Apache License 2.0

6 votes

/**
 * Prunes columns from a Parquet file schema that was written without field ids.
 * <p>
 * Files that were written without field ids are read assuming that schema evolution preserved
 * column order. Deleting columns was not allowed.
 * <p>
 * The order of columns in the resulting Parquet schema matches the Parquet file.
 *
 * @param fileSchema schema from a Parquet file that does not have field ids.
 * @param expectedSchema expected schema
 * @return a parquet schema pruned using the expected schema
 */
public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) {
  Set<Integer> selectedIds = Sets.newHashSet();

  for (Types.NestedField field : expectedSchema.columns()) {
    selectedIds.add(field.fieldId());
  }

  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1;
  for (Type type : fileSchema.getFields()) {
    if (selectedIds.contains(ordinal)) {
      builder.addField(type.withId(ordinal));
    }
    ordinal += 1;
  }

  return builder.named(fileSchema.getName());
}

Source File: DataWritableGroupConverter.java From parquet-mr with Apache License 2.0

6 votes

public DataWritableGroupConverter(final GroupType selectedGroupType,
    final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
  this.parent = parent;
  this.index = index;
  final int totalFieldCount = containingGroupType.getFieldCount();
  final int selectedFieldCount = selectedGroupType.getFieldCount();

  currentArr = new Object[totalFieldCount];
  converters = new Converter[selectedFieldCount];

  List<Type> selectedFields = selectedGroupType.getFields();
  for (int i = 0; i < selectedFieldCount; i++) {
    Type subtype = selectedFields.get(i);
    if (containingGroupType.getFields().contains(subtype)) {
      converters[i] = getConverterFromDescription(subtype,
          containingGroupType.getFieldIndex(subtype.getName()), this);
    } else {
      throw new IllegalStateException("Group type [" + containingGroupType +
          "] does not contain requested field: " + subtype);
    }
  }
}

Source File: HiveClientTest.java From garmadon with Apache License 2.0

6 votes

@Test
public void createTableWithoutIssue() throws SQLException {
    PrimitiveType appId = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "app_id");

    MessageType schema = new MessageType("fs", appId);

    String table = "fs";
    String location = "file:" + hdfsTemp + "/garmadon_database/fs";
    HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon",
        hdfsTemp + "/garmadon_database");
    hiveClient.createTableIfNotExist(table, schema, location);

    HashMap<String, String> result = getResultHashTableDesc(hiveClient, table);
    assertEquals(location, result.get("Location"));
    assertEquals("EXTERNAL_TABLE", result.get("Table Type").trim());
    assertEquals("string", result.get("day"));
    assertEquals("string", result.get("app_id"));
}

Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0

6 votes

public String toString(String indent) {
  StringBuilder result = new StringBuilder();
  int i = 0;
  for (Type field : this.schema.getFields()) {
    String name = field.getName();
    List<Object> values = this.data[i];
    for (Object value : values) {
      result.append(indent).append(name);
      if (value == null) {
        result.append(": NULL\n");
      } else if (value instanceof Group) {
        result.append("\n").append(((ParquetGroup) value).toString(indent + "  "));
      } else {
        result.append(": ").append(value.toString()).append("\n");
      }
    }
    i++;
  }
  return result.toString();
}

Source File: LogicalListL2Converter.java From dremio-oss with Apache License 2.0

6 votes

@Override
protected void addChildConverter(String fieldName, OutputMutator mutator, List<Field> arrowSchema, Iterator<SchemaPath> colIterator, Type type, Function<String, String> childNameResolver) {
  final String nameForChild = "inner";
  // Column name to ID mapping creates child entry as 'columnName'.list.element
  // So, we will append 'list.element' so that name to ID matching works correctly
  final String fullChildName = fieldName.concat(".").concat("list.element");
  if (type.isPrimitive()) {
    converters.add( getConverterForType(fullChildName, type.asPrimitiveType()));
  } else {
    final GroupType groupType = type.asGroupType();
    Collection<SchemaPath> c = Lists.newArrayList(colIterator);
    if (arrowSchema != null) {
      converters.add( groupConverterFromArrowSchema(fullChildName, "$data$", groupType, c));
    } else {
      converters.add( defaultGroupConverter(fullChildName, mutator, groupType, c, null));
    }
  }
}

Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0

6 votes

private void writeRecordFields(GroupType schema, Schema avroSchema,
                               Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}

Source File: AvroSchemaConverterLogicalTypesPre19.java From datacollector with Apache License 2.0

6 votes

private Schema convertFields(String name, List<Type> parquetFields) {
  List<Schema.Field> fields = new ArrayList<Schema.Field>();
  for (Type parquetType : parquetFields) {
    Schema fieldSchema = convertField(parquetType);
    if (parquetType.isRepetition(REPEATED)) {
      throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType);
    } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
      fields.add(new Schema.Field(
          parquetType.getName(), optional(fieldSchema), null, NullNode.getInstance()));
    } else { // REQUIRED
      fields.add(new Schema.Field(parquetType.getName(), fieldSchema, null, null));
    }
  }
  Schema schema = Schema.createRecord(name, null, null, false);
  schema.setFields(fields);
  return schema;
}

Source File: SimpleGroup.java From parquet-mr with Apache License 2.0

6 votes

private StringBuilder appendToString(StringBuilder builder, String indent) {
  int i = 0;
  for (Type field : schema.getFields()) {
    String name = field.getName();
    List<Object> values = data[i];
    ++i;
    if (values != null && !values.isEmpty()) {
      for (Object value : values) {
        builder.append(indent).append(name);
        if (value == null) {
          builder.append(": NULL\n");
        } else if (value instanceof Group) {
          builder.append('\n');
          ((SimpleGroup) value).appendToString(builder, indent + "  ");
        } else {
          builder.append(": ").append(value.toString()).append('\n');
        }
      }
    }
  }
  return builder;
}

Source File: ParquetValueReaders.java From iceberg with Apache License 2.0

5 votes

public static <T> ParquetValueReader<T> option(Type type, int definitionLevel,
                                               ParquetValueReader<T> reader) {
  if (type.isRepetition(Type.Repetition.OPTIONAL)) {
    return new OptionReader<>(definitionLevel, reader);
  }
  return reader;
}

Source File: AvroSchemaConverterLogicalTypesPre19.java From datacollector with Apache License 2.0

5 votes

public MessageType convert(Schema avroSchema) {
  LOG.info("Using customized AvroSchemaConverter utility to convert: " + avroSchema.toString());
  if (!avroSchema.getType().equals(Schema.Type.RECORD)) {
    throw new IllegalArgumentException("Avro schema must be a record.");
  }

  return new MessageType(avroSchema.getFullName(), convertFields(avroSchema.getFields()));
}

Source File: TajoRecordConverter.java From tajo with Apache License 2.0

5 votes

/**
 * Creates a new TajoRecordConverter.
 *
 * @param parquetSchema The Parquet schema of the projection.
 * @param tajoReadSchema The Tajo schema of the table.
 * @param projectionMap An array mapping the projection column to the column
 *                      index in the table.
 */
public TajoRecordConverter(GroupType parquetSchema, Schema tajoReadSchema,
                           int[] projectionMap) {
  this.parquetSchema = parquetSchema;
  this.tajoReadSchema = tajoReadSchema;
  this.projectionMap = projectionMap;
  this.tupleSize = tajoReadSchema.size();

  // The projectionMap.length does not match parquetSchema.getFieldCount()
  // when the projection contains NULL_TYPE columns. We will skip over the
  // NULL_TYPE columns when we construct the converters and populate the
  // NULL_TYPE columns with NullDatums in start().
  int index = 0;
  this.converters = new Converter[parquetSchema.getFieldCount()];
  for (int i = 0; i < projectionMap.length; ++i) {
    final int projectionIndex = projectionMap[i];
    Column column = tajoReadSchema.getColumn(projectionIndex);
    if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
      continue;
    }
    Type type = parquetSchema.getType(index);
    final int writeIndex = i;
    converters[index] = newConverter(column, type, new ParentValueContainer() {
      @Override
      void add(Object value) {
        TajoRecordConverter.this.set(writeIndex, value);
      }
    });
    ++index;
  }
}

Source File: ParquetFileAccessor.java From pxf with Apache License 2.0

5 votes

/**
 * Returns the parquet record filter for the given filter string
 *
 * @param filterString      the filter string
 * @param originalFieldsMap a map of field names to types
 * @param schema            the parquet schema
 * @return the parquet record filter for the given filter string
 */
private FilterCompat.Filter getRecordFilter(String filterString, Map<String, Type> originalFieldsMap, MessageType schema) {
    if (StringUtils.isBlank(filterString)) {
        return FilterCompat.NOOP;
    }

    ParquetRecordFilterBuilder filterBuilder = new ParquetRecordFilterBuilder(
            context.getTupleDescription(), originalFieldsMap);
    TreeVisitor pruner = new ParquetOperatorPrunerAndTransformer(
            context.getTupleDescription(), originalFieldsMap, SUPPORTED_OPERATORS);

    try {
        // Parse the filter string into a expression tree Node
        Node root = new FilterParser().parse(filterString);
        // Prune the parsed tree with valid supported operators and then
        // traverse the pruned tree with the ParquetRecordFilterBuilder to
        // produce a record filter for parquet
        TRAVERSER.traverse(root, pruner, filterBuilder);
        return filterBuilder.getRecordFilter();
    } catch (Exception e) {
        LOG.error(String.format("%s-%d: %s--%s Unable to generate Parquet Record Filter for filter",
                context.getTransactionId(),
                context.getSegmentId(),
                context.getDataSource(),
                context.getFilterString()), e);
        return FilterCompat.NOOP;
    }
}

Source File: ParquetTypeVisitor.java From iceberg with Apache License 2.0

5 votes

private static <T> T visitField(Type field, ParquetTypeVisitor<T> visitor) {
  visitor.fieldNames.push(field.getName());
  try {
    return visit(field, visitor);
  } finally {
    visitor.fieldNames.pop();
  }
}

Source File: JsonRecordFormatter.java From parquet-mr with Apache License 2.0

5 votes

@Override
protected Object formatResults(List<SimpleRecord> values) {
  if (super.typeInfo.getRepetition() == Type.Repetition.REPEATED) {
    List<Object> results = new ArrayList<Object>();
    for (SimpleRecord object : values) {
      results.add(add(object));
    }

    return results;
  } else {
    return add(values.get(SINGLE_VALUE));
  }
}

Source File: DataWritableWriter.java From parquet-mr with Apache License 2.0

5 votes

private void writeArray(final ArrayWritable array, final GroupType type) {
  if (array == null) {
    return;
  }
  final Writable[] subValues = array.get();
  final int fieldCount = type.getFieldCount();
  for (int field = 0; field < fieldCount; ++field) {
    final Type subType = type.getType(field);
    recordConsumer.startField(subType.getName(), field);
    for (int i = 0; i < subValues.length; ++i) {
      final Writable subValue = subValues[i];
      if (subValue != null) {
        if (subType.isPrimitive()) {
          if (subValue instanceof ArrayWritable) {
            writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ?
          } else {
            writePrimitive(subValue);
          }
        } else {
          if (!(subValue instanceof ArrayWritable)) {
            throw new RuntimeException("This should be a ArrayWritable: " + subValue);
          } else {
            recordConsumer.startGroup();
            writeData((ArrayWritable) subValue, subType.asGroupType());
            recordConsumer.endGroup();
          }
        }
      }
    }
    recordConsumer.endField(subType.getName(), field);
  }
}

Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

private Type[] convertTypes(Schema pigSchema) {
  List<FieldSchema> fields = pigSchema.getFields();
  Type[] types = new Type[fields.size()];
  for (int i = 0; i < types.length; i++) {
    types[i] = convert(fields.get(i), i);
  }
  return types;
}

Source File: HiveClientTest.java From garmadon with Apache License 2.0

5 votes

@Test(expected = Exception.class)
public void shouldThrowExceptionForUnknownParquetType() throws Exception {
    HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon",
        hdfsTemp + "/garmadon_database");

    PrimitiveType unsupported = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT96, "unsupported");
    hiveClient.inferHiveType(unsupported);
}

Source File: TestParquetVectorizedReads.java From iceberg with Apache License 2.0

5 votes

@Test
@Override
public void testNestedStruct() {
  AssertHelpers.assertThrows(
      "Vectorized reads are not supported yet for struct fields",
      UnsupportedOperationException.class,
      "Vectorized reads are not supported yet for struct fields",
      () -> VectorizedSparkParquetReaders.buildReader(
          TypeUtil.assignIncreasingFreshIds(new Schema(required(
              1,
              "struct",
              SUPPORTED_PRIMITIVES))),
          new MessageType("struct", new GroupType(Type.Repetition.OPTIONAL, "struct").withId(1)),
          false));
}

Source File: RowConverter.java From flink with Apache License 2.0

5 votes

ArrayConverter(Type elementType, Class elementClass, TypeInformation elementTypeInfo,
					ParentDataHolder parentDataHolder, int pos) {
	this.elementClass = elementClass;
	this.parentDataHolder = parentDataHolder;
	this.pos = pos;

	if (elementClass.equals(Row.class)) {
		this.elementConverter = createConverter(elementType, 0, elementTypeInfo, this);
	} else {
		this.elementConverter = new RowConverter.RowPrimitiveConverter(elementType, this, 0);
	}
}

Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0

5 votes

private StructConverter(List<TProtocol> events, GroupType parquetSchema, ThriftField field) {
  this.events = events;
  this.name = field.getName();
  this.tStruct = new TStruct(name);
  this.thriftType = (StructType)field.getType();
  this.schemaSize = parquetSchema.getFieldCount();
  this.converters = new Converter[this.schemaSize];
  List<ThriftField> thriftChildren = thriftType.getChildren();
  for (int i = 0; i < schemaSize; i++) {
    Type schemaType = parquetSchema.getType(i);
    String fieldName = schemaType.getName();
    ThriftField matchingThrift = null;
    for (ThriftField childField: thriftChildren) {
      String thriftChildName = childField.getName();
      if (thriftChildName != null && thriftChildName.equalsIgnoreCase(fieldName)) {
        matchingThrift = childField;
        break;
      }
    }
    if (matchingThrift == null) {
    	// this means the file did not contain that field
      // it will never be populated in this instance
      // other files might populate it
    	continue;
    }
    if (schemaType.isPrimitive()) {
    	converters[i] = new PrimitiveFieldHandler(newConverter(events, schemaType, matchingThrift).asPrimitiveConverter(), matchingThrift, events);
    } else {
    	converters[i] = new GroupFieldhandler(newConverter(events, schemaType, matchingThrift).asGroupConverter(), matchingThrift, events);
    }
  }
}

Source File: ParquetFileAccessor.java From pxf with Apache License 2.0

5 votes

/**
 * Opens the resource for read.
 *
 * @throws IOException if opening the resource failed
 */
@Override
public boolean openForRead() throws IOException {
    file = new Path(context.getDataSource());
    FileSplit fileSplit = HdfsUtilities.parseFileSplit(context);

    // Read the original schema from the parquet file
    MessageType originalSchema = getSchema(file, fileSplit);
    // Get a map of the column name to Types for the given schema
    Map<String, Type> originalFieldsMap = getOriginalFieldsMap(originalSchema);
    // Get the read schema. This is either the full set or a subset (in
    // case of column projection) of the greenplum schema.
    MessageType readSchema = buildReadSchema(originalFieldsMap, originalSchema);
    // Get the record filter in case of predicate push-down
    FilterCompat.Filter recordFilter = getRecordFilter(context.getFilterString(), originalFieldsMap, readSchema);

    // add column projection
    configuration.set(PARQUET_READ_SCHEMA, readSchema.toString());

    fileReader = ParquetReader.builder(new GroupReadSupport(), file)
            .withConf(configuration)
            // Create reader for a given split, read a range in file
            .withFileRange(fileSplit.getStart(), fileSplit.getStart() + fileSplit.getLength())
            .withFilter(recordFilter)
            .build();
    context.setMetadata(readSchema);
    return true;
}

Source File: GenericParquetReaders.java From iceberg with Apache License 2.0

5 votes

@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array,
                                  ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath)-1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath)-1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName()))-1;

  return new ListReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader));
}

Source File: ParquetSchemaConverter.java From flink with Apache License 2.0

5 votes

private static TypeInformation<?> convertParquetPrimitiveListToFlinkArray(Type type) {
	// Backward-compatibility element group doesn't exist also allowed
	TypeInformation<?> flinkType = convertParquetTypeToTypeInfo(type);
	if (flinkType.isBasicType()) {
		return BasicArrayTypeInfo.getInfoFor(Array.newInstance(flinkType.getTypeClass(), 0).getClass());
	} else {
		// flinkType here can be either SqlTimeTypeInfo or BasicTypeInfo.BIG_DEC_TYPE_INFO,
		// So it should be converted to ObjectArrayTypeInfo
		return ObjectArrayTypeInfo.getInfoFor(flinkType);
	}
}

org.apache.parquet.schema.Type Java Examples