org.apache.parquet.schema.PrimitiveType Java Examples
The following examples show how to use
Example #1
Source File: From Bats with Apache License 2.0 | 6 votes |
@Override public void serialize(ColumnMetadata_v2 value, JsonGenerator jgen, SerializerProvider provider) throws IOException, JsonProcessingException { jgen.writeStartObject(); jgen.writeArrayFieldStart("name"); for (String n : { jgen.writeString(n); } jgen.writeEndArray(); if (value.mxValue != null) { Object val; if (value.primitiveType == PrimitiveType.PrimitiveTypeName.BINARY && value.mxValue != null) { val = new String(((Binary) value.mxValue).getBytes()); } else { val = value.mxValue; } jgen.writeObjectField("mxValue", val); } if (value.nulls != null) { jgen.writeObjectField("nulls", value.nulls); } jgen.writeEndObject(); }
Example #2
Source File: From pxf with Apache License 2.0 | 6 votes |
private MessageType getParquetSchemaForPrimitiveTypes(Type.Repetition repetition, boolean readCase) { List<Type> fields = new ArrayList<>(); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s1", OriginalType.UTF8)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s2", OriginalType.UTF8)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "n1", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, "d1", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 16, "dc1", OriginalType.DECIMAL, new DecimalMetadata(38, 18), null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tm", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, "f", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT64, "bg", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, "b", null)); // GPDB only has int16 and not int8 type, so for write tiny numbers int8 are still treated as shorts in16 OriginalType tinyType = readCase ? OriginalType.INT_8 : OriginalType.INT_16; fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "tn", tinyType)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "sml", OriginalType.INT_16)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "vc1", OriginalType.UTF8)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "c1", OriginalType.UTF8)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "bin", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz", null)); fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz2", null)); return new MessageType("hive_schema", fields); }
Example #3
Source File: From datacollector with Apache License 2.0 | 6 votes |
private Schema addLogicalTypeToSchema( Schema schema, OriginalType annotation, PrimitiveType asPrimitive, PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName ) { LogicalType logicalType = convertOriginalTypeToLogicalType( annotation, asPrimitive.getDecimalMetadata()); if (logicalType != null && (annotation != DECIMAL || parquetPrimitiveTypeName == BINARY || parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) { schema = logicalType.addToSchema(schema); } return schema; }
Example #4
Source File: From presto with Apache License 2.0 | 6 votes |
private static org.apache.parquet.format.Type getType(PrimitiveType.PrimitiveTypeName type) { switch (type) { case INT64: return Type.INT64; case INT32: return Type.INT32; case BOOLEAN: return Type.BOOLEAN; case BINARY: return Type.BYTE_ARRAY; case FLOAT: return Type.FLOAT; case DOUBLE: return Type.DOUBLE; case INT96: return Type.INT96; case FIXED_LEN_BYTE_ARRAY: return Type.FIXED_LEN_BYTE_ARRAY; default: throw new RuntimeException("Unknown primitive type " + type); } }
Example #5
Source File: From presto with Apache License 2.0 | 6 votes |
@Test public void testParquetTupleDomainMap() { MapType mapType = new MapType( INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException")); HiveColumnHandle columnHandle = createBaseColumn("my_map", 0, HiveType.valueOf("map<int,int>"), mapType, REGULAR, Optional.empty()); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value")))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true); assertTrue(tupleDomain.isAll()); }
Example #6
Source File: From Bats with Apache License 2.0 | 6 votes |
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) { if (type.isPrimitive()) { PrimitiveType primitiveType = (PrimitiveType) type; int precision = 0; int scale = 0; if (primitiveType.getDecimalMetadata() != null) { precision = primitiveType.getDecimalMetadata().getPrecision(); scale = primitiveType.getDecimalMetadata().getScale(); } int repetitionLevel = schema.getMaxRepetitionLevel(path); int definitionLevel = schema.getMaxDefinitionLevel(path); return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel); } Type t = ((GroupType) type).getType(path[depth]); return getColTypeInfo(schema, t, path, depth + 1); }
Example #7
Source File: From presto with Apache License 2.0 | 6 votes |
@Test(dataProvider = "allCreatedBy") public void testReadStatsInt64(Optional<String> fileCreatedBy) { Statistics statistics = new Statistics(); statistics.setNull_count(13); statistics.setMin(fromHex("F6FFFFFFFFFFFFFF")); statistics.setMax(fromHex("3AA4000000000000")); assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, INT64, "Test column"))) .isInstanceOfSatisfying(LongStatistics.class, columnStatistics -> { assertEquals(columnStatistics.getNumNulls(), 13); assertEquals(columnStatistics.getMin(), -10); assertEquals(columnStatistics.getMax(), 42042); assertEquals(columnStatistics.genericGetMin(), (Long) (long) -10L); assertEquals(columnStatistics.genericGetMax(), (Long) 42042L); }); }
Example #8
Source File: From presto with Apache License 2.0 | 6 votes |
@Test public void testParquetTupleDomainStruct() { RowType rowType = rowType( RowType.field("a", INTEGER), RowType.field("b", INTEGER)); HiveColumnHandle columnHandle = createBaseColumn("my_struct", 0, HiveType.valueOf("struct<a:int,b:int>"), rowType, REGULAR, Optional.empty()); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true); assertTrue(tupleDomain.isAll()); }
Example #9
Source File: From presto with Apache License 2.0 | 6 votes |
@Test public void testParquetTupleDomainPrimitive() { HiveColumnHandle columnHandle = createBaseColumn("my_primitive", 0, HiveType.valueOf("bigint"), BIGINT, REGULAR, Optional.empty()); Domain singleValueDomain = Domain.singleValue(BIGINT, 123L); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, singleValueDomain)); MessageType fileSchema = new MessageType("hive_schema", new PrimitiveType(OPTIONAL, INT64, "my_primitive")); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true); assertEquals(tupleDomain.getDomains().get().size(), 1); ColumnDescriptor descriptor = tupleDomain.getDomains().get().keySet().iterator().next(); assertEquals(descriptor.getPath().length, 1); assertEquals(descriptor.getPath()[0], "my_primitive"); Domain predicateDomain = Iterables.getOnlyElement(tupleDomain.getDomains().get().values()); assertEquals(predicateDomain, singleValueDomain); }
Example #10
Source File: From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testReadUsingSchemaWithRequiredFieldThatWasOptional(){ MessageType originalSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "e")); MemPageStore store = new MemPageStore(1); SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema); writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4)); try { MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "e")); // Incompatible schema: required when it was optional readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1); fail("should have thrown an incompatible schema exception"); } catch (ParquetDecodingException e) { assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage()); } }
Example #11
Source File: From garmadon with Apache License 2.0 | 6 votes |
@Test public void createTableWithoutIssue() throws SQLException { PrimitiveType appId = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "app_id"); MessageType schema = new MessageType("fs", appId); String table = "fs"; String location = "file:" + hdfsTemp + "/garmadon_database/fs"; HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon", hdfsTemp + "/garmadon_database"); hiveClient.createTableIfNotExist(table, schema, location); HashMap<String, String> result = getResultHashTableDesc(hiveClient, table); assertEquals(location, result.get("Location")); assertEquals("EXTERNAL_TABLE", result.get("Table Type").trim()); assertEquals("string", result.get("day")); assertEquals("string", result.get("app_id")); }
Example #12
Source File: From garmadon with Apache License 2.0 | 6 votes |
@Test public void shouldProvideHiveTypeFromParquetType() throws Exception { HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon", hdfsTemp + "/garmadon_database"); PrimitiveType string = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "name"); assertEquals("string", hiveClient.inferHiveType(string)); PrimitiveType array_string = new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.BINARY, "name"); assertEquals("array<string>", hiveClient.inferHiveType(array_string)); PrimitiveType int32 = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT32, "name"); assertEquals("int", hiveClient.inferHiveType(int32)); PrimitiveType int64 = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT64, "name"); assertEquals("bigint", hiveClient.inferHiveType(int64)); PrimitiveType floatz = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.FLOAT, "name"); assertEquals("float", hiveClient.inferHiveType(floatz)); PrimitiveType doublez = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.DOUBLE, "name"); assertEquals("double", hiveClient.inferHiveType(doublez)); PrimitiveType booleanz = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BOOLEAN, "name"); assertEquals("boolean", hiveClient.inferHiveType(booleanz)); }
Example #13
Source File: From iceberg with Apache License 2.0 | 6 votes |
private boolean eval(MessageType fileSchema, BlockMetaData rowGroup) { if (rowGroup.getRowCount() <= 0) { return ROWS_CANNOT_MATCH; } this.stats = Maps.newHashMap(); this.valueCounts = Maps.newHashMap(); this.conversions = Maps.newHashMap(); for (ColumnChunkMetaData col : rowGroup.getColumns()) { PrimitiveType colType = fileSchema.getType(col.getPath().toArray()).asPrimitiveType(); if (colType.getId() != null) { int id = colType.getId().intValue(); stats.put(id, col.getStatistics()); valueCounts.put(id, col.getValueCount()); conversions.put(id, ParquetConversions.converterFromParquet(colType)); } } return ExpressionVisitors.visitEvaluator(expr, this); }
Example #14
Source File: From Bats with Apache License 2.0 | 6 votes |
protected PrimitiveType getPrimitiveType(MaterializedField field) { MinorType minorType = field.getType().getMinorType(); String name = field.getName(); int length = ParquetTypeHelper.getLengthForMinorType(minorType); PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType); if (Types.isDecimalType(minorType)) { primitiveTypeName = logicalTypeForDecimals; if (usePrimitiveTypesForDecimals) { if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) { primitiveTypeName = PrimitiveTypeName.INT32; } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) { primitiveTypeName = PrimitiveTypeName.INT64; } } length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision()); } Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode()); OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType); DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field); return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null); }
Example #15
Source File: From flink with Apache License 2.0 | 5 votes |
public FixedLenBytesColumnReader( ColumnDescriptor descriptor, PageReader pageReader, int precision) throws IOException { super(descriptor, pageReader); checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY); this.precision = precision; }
Example #16
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath) { if (type instanceof GroupType) { showDetails(out, type.asGroupType(), depth, container, cpath); return; } else if (type instanceof PrimitiveType) { showDetails(out, type.asPrimitiveType(), depth, container, cpath); return; } }
Example #17
Source File: From Bats with Apache License 2.0 | 5 votes |
/** * Returns data type length for a given {@see ColumnDescriptor} and it's corresponding * {@see SchemaElement}. Neither is enough information alone as the max * repetition level (indicating if it is an array type) is in the ColumnDescriptor and * the length of a fixed width field is stored at the schema level. * * @return the length if fixed width, else <tt>UNDEFINED_LENGTH</tt> (-1) */ public int getDataTypeLength() { if (! isFixedLength()) { return UNDEFINED_LENGTH; } else if (isRepeated()) { return UNDEFINED_LENGTH; } else if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { return se.getType_length() * 8; } else { return getTypeLengthInBits(column.getType()); } }
Example #18
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
@Override final public void start() { currentTuple = TF.newTuple(schemaSize); if (elephantBirdCompatible) { try { int i = 0; for (Type field : parquetSchema.getFields()) { if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) { PrimitiveType primitiveType = field.asPrimitiveType(); switch (primitiveType.getPrimitiveTypeName()) { case INT32: currentTuple.set(i, I32_ZERO); break; case INT64: currentTuple.set(i, I64_ZERO); break; case FLOAT: currentTuple.set(i, FLOAT_ZERO); break; case DOUBLE: currentTuple.set(i, DOUBLE_ZERO); break; case BOOLEAN: currentTuple.set(i, I32_ZERO); break; } } ++ i; } } catch (ExecException e) { throw new RuntimeException(e); } } }
Example #19
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
public static Binary getFixedBinary(PrimitiveType type, BigInteger bigInt) { switch (type.getPrimitiveTypeName()) { case FIXED_LEN_BYTE_ARRAY: return getFixedBinary(type.getTypeLength(), bigInt); case INT96: return getFixedBinary(12, bigInt); case BINARY: return Binary.fromConstantByteArray(bigInt.toByteArray()); default: throw new IllegalArgumentException("Type " + type + " cannot be represented by a Binary"); } }
Example #20
Source File: From presto with Apache License 2.0 | 5 votes |
@Test public void testVarcharMatchesWithStatistics() throws ParquetCorruptionException { String value = "Test"; ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value)); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); Statistics<?> stats = getStatsBasedOnType(column.getPrimitiveType().getPrimitiveTypeName()); stats.setNumNulls(1L); stats.setMinMaxFromBytes(value.getBytes(UTF_8), value.getBytes(UTF_8)); assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, stats), ID, true)); }
Example #21
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
private void testTruncator(PrimitiveType type, boolean strict) { BinaryTruncator truncator = BinaryTruncator.getTruncator(type); Comparator<Binary> comparator = type.comparator(); checkContract(truncator, comparator, Binary.fromString("aaaaaaaaaa"), strict, strict); checkContract(truncator, comparator, Binary.fromString("árvÃztűrÅ‘ tükörfúrógép"), strict, strict); checkContract(truncator, comparator, Binary.fromString("aaaaaaaaaa" + UTF8_3BYTES_MAX_CHAR), strict, strict); checkContract(truncator, comparator, Binary.fromString("a" + UTF8_3BYTES_MAX_CHAR + UTF8_1BYTE_MAX_CHAR), strict, strict); checkContract(truncator, comparator, Binary.fromConstantByteArray(new byte[] { (byte) 0xFE, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, (byte) 0xFF }), strict, strict); // Edge case: zero length -> unable to truncate checkContract(truncator, comparator, Binary.fromString(""), false, false); // Edge case: containing only UTF-8 max characters -> unable to truncate for max checkContract(truncator, comparator, Binary.fromString( UTF8_1BYTE_MAX_CHAR + UTF8_4BYTES_MAX_CHAR + UTF8_3BYTES_MAX_CHAR + UTF8_4BYTES_MAX_CHAR + UTF8_2BYTES_MAX_CHAR + UTF8_3BYTES_MAX_CHAR + UTF8_3BYTES_MAX_CHAR + UTF8_1BYTE_MAX_CHAR + UTF8_2BYTES_MAX_CHAR + UTF8_3BYTES_MAX_CHAR + UTF8_4BYTES_MAX_CHAR), strict, false); // Edge case: non-UTF-8; max bytes -> unable to truncate for max checkContract( truncator, comparator, binary(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF), strict, false); }
Example #22
Source File: From presto with Apache License 2.0 | 5 votes |
@Test public void testVarcharMatchesWithDictionaryDescriptor() { ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY); assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page)))); }
Example #23
Source File: From garmadon with Apache License 2.0 | 5 votes |
@Test(expected = Exception.class) public void shouldThrowExceptionForUnknownParquetType() throws Exception { HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon", hdfsTemp + "/garmadon_database"); PrimitiveType unsupported = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT96, "unsupported"); hiveClient.inferHiveType(unsupported); }
Example #24
Source File: From iceberg with Apache License 2.0 | 5 votes |
public static boolean isIntType(PrimitiveType primitiveType) { if (primitiveType.getOriginalType() != null) { switch (primitiveType.getOriginalType()) { case INT_8: case INT_16: case INT_32: case DATE: return true; default: return false; } } return primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32; }
Example #25
Source File: From presto with Apache License 2.0 | 5 votes |
@Test(dataProvider = "allCreatedBy") public void testReadNullStats(Optional<String> fileCreatedBy) { // integer assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT32, "Test column"))) .isInstanceOfSatisfying( IntStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // bigint assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT64, "Test column"))) .isInstanceOfSatisfying( LongStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varchar assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varbinary assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column"))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); }
Example #26
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
@Override ColumnIndexBase<Float> createColumnIndex(PrimitiveType type) { if (invalid) { return null; } FloatColumnIndex columnIndex = new FloatColumnIndex(type); columnIndex.minValues = minValues.toFloatArray(); columnIndex.maxValues = maxValues.toFloatArray(); return columnIndex; }
Example #27
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
private ConvertedField visitPrimitiveType(PrimitiveTypeName type, LogicalTypeAnnotation orig, State state) { PrimitiveBuilder<PrimitiveType> b = primitive(type, state.repetition); if (orig != null) { b =; } if (fieldProjectionFilter.keep(state.path)) { return new Keep(state.path, b.named(; } else { return new Drop(state.path); } }
Example #28
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
/** * * It creates the readContext for Parquet side with the requested schema during the init phase. * * @param configuration needed to get the wanted columns * @param keyValueMetaData // unused * @param fileSchema parquet file schema * @return the parquet ReadContext */ @Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema) { final String columns = configuration.get(IOConstants.COLUMNS); final Map<String, String> contextMetadata = new HashMap<String, String>(); if (columns != null) { final List<String> listColumns = getColumns(columns); final List<Type> typeListTable = new ArrayList<Type>(); for (final String col : listColumns) { // listColumns contains partition columns which are metadata only if (fileSchema.containsField(col)) { typeListTable.add(fileSchema.getType(col)); } else { // below allows schema evolution typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col)); } } MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable); contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString()); MessageType requestedSchemaByUser = tableSchema; final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); final List<Type> typeListWanted = new ArrayList<Type>(); for (final Integer idx : indexColumnsWanted) { typeListWanted.add(tableSchema.getType(listColumns.get(idx))); } requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(), typeListWanted), fileSchema, configuration); return new ReadContext(requestedSchemaByUser, contextMetadata); } else { contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } }
Example #29
Source File: From Bats with Apache License 2.0 | 5 votes |
public ColumnMetadata_v3(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, Object minValue, Object maxValue, Long nulls) { = name; this.minValue = minValue; this.maxValue = maxValue; this.nulls = nulls; this.primitiveType = primitiveType; }
Example #30
Source File: From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void visit(PrimitiveType primitiveType) { if (!currentRequestedType.isPrimitive() || (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) { incompatibleSchema(primitiveType, currentRequestedType); } PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size()); current.add(newIO); leaves.add(newIO); }