org.apache.iceberg.PartitionField Java Examples
The following examples show how to use
org.apache.iceberg.PartitionField.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Projections.java From iceberg with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public <T> Expression predicate(BoundPredicate<T> pred) { Collection<PartitionField> parts = spec().getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { // the predicate has no partition column return Expressions.alwaysTrue(); } Expression result = Expressions.alwaysTrue(); for (PartitionField part : parts) { // consider (d = 2019-01-01) with bucket(7, d) and bucket(5, d) // projections: b1 = bucket(7, '2019-01-01') = 5, b2 = bucket(5, '2019-01-01') = 0 // any value where b1 != 5 or any value where b2 != 0 cannot be the '2019-01-01' // // similarly, if partitioning by day(ts) and hour(ts), the more restrictive // projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02. UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred); if (inclusiveProjection != null) { result = Expressions.and(result, inclusiveProjection); } } return result; }
Example #2
Source File: HiveTypeConverter.java From metacat with Apache License 2.0 | 6 votes |
/** * Converts iceberg schema to field dto. * * @param schema schema * @param partitionFields partitioned fields * @return list of field Info */ public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema, final List<PartitionField> partitionFields) { final List<FieldInfo> fields = Lists.newArrayList(); final List<String> partitionNames = partitionFields.stream() .map(f -> schema.findField(f.sourceId()).name()).collect(Collectors.toList()); for (Types.NestedField field : schema.columns()) { final FieldInfo fieldInfo = new FieldInfo(); fieldInfo.setName(field.name()); final org.apache.iceberg.types.Type fieldType = field.type(); fieldInfo.setSourceType(fieldType.toString()); fieldInfo.setType(toMetacatType(fromIcebergToHiveType(fieldType))); fieldInfo.setIsNullable(field.isOptional()); fieldInfo.setComment(field.doc()); fieldInfo.setPartitionKey(partitionNames.contains(field.name())); fields.add(fieldInfo); } return fields; }
Example #3
Source File: PartitionKey.java From iceberg with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") PartitionKey(PartitionSpec spec, Schema inputSchema) { this.spec = spec; List<PartitionField> fields = spec.fields(); this.size = fields.size(); this.partitionTuple = new Object[size]; this.transforms = new Transform[size]; this.accessors = (Accessor<InternalRow>[]) Array.newInstance(Accessor.class, size); Schema schema = spec.schema(); Map<Integer, Accessor<InternalRow>> newAccessors = buildAccessors(inputSchema); for (int i = 0; i < size; i += 1) { PartitionField field = fields.get(i); Accessor<InternalRow> accessor = newAccessors.get(field.sourceId()); if (accessor == null) { throw new RuntimeException( "Cannot build accessor for field: " + schema.findField(field.sourceId())); } this.accessors[i] = accessor; this.transforms[i] = field.transform(); } }
Example #4
Source File: Projections.java From iceberg with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public <T> Expression predicate(BoundPredicate<T> pred) { Collection<PartitionField> parts = spec().getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { // the predicate has no partition column return Expressions.alwaysFalse(); } Expression result = Expressions.alwaysFalse(); for (PartitionField part : parts) { // consider (ts > 2019-01-01T01:00:00) with day(ts) and hour(ts) // projections: d >= 2019-01-02 and h >= 2019-01-01-02 (note the inclusive bounds). // any timestamp where either projection predicate is true must match the original // predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not // the day, but does match the original predicate. UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred); if (strictProjection != null) { result = Expressions.or(result, strictProjection); } } return result; }
Example #5
Source File: ManifestsTable.java From presto with Apache License 2.0 | 6 votes |
private static void writePartitionSummaries(BlockBuilder blockBuilder, List<PartitionFieldSummary> summaries, PartitionSpec partitionSpec) { for (int i = 0; i < summaries.size(); i++) { PartitionFieldSummary summary = summaries.get(i); PartitionField field = partitionSpec.fields().get(i); Type nestedType = partitionSpec.partitionType().fields().get(i).type(); BlockBuilder rowBuilder = blockBuilder.beginBlockEntry(); BOOLEAN.writeBoolean(rowBuilder, summary.containsNull()); VARCHAR.writeString(rowBuilder, field.transform().toHumanString( Conversions.fromByteBuffer(nestedType, summary.lowerBound()))); VARCHAR.writeString(rowBuilder, field.transform().toHumanString( Conversions.fromByteBuffer(nestedType, summary.upperBound()))); blockBuilder.closeEntry(); } }
Example #6
Source File: IcebergUtil.java From presto with Apache License 2.0 | 5 votes |
public static Map<PartitionField, Integer> getIdentityPartitions(PartitionSpec partitionSpec) { // TODO: expose transform information in Iceberg library ImmutableMap.Builder<PartitionField, Integer> columns = ImmutableMap.builder(); for (int i = 0; i < partitionSpec.fields().size(); i++) { PartitionField field = partitionSpec.fields().get(i); if (field.transform().toString().equals("identity")) { columns.put(field, i); } } return columns.build(); }
Example #7
Source File: PartitionFields.java From presto with Apache License 2.0 | 5 votes |
private static String toPartitionField(PartitionSpec spec, PartitionField field) { String name = spec.schema().findColumnName(field.sourceId()); String transform = field.transform().toString(); switch (transform) { case "identity": return name; case "year": case "month": case "day": case "hour": return format("%s(%s)", transform, name); } Matcher matcher = ICEBERG_BUCKET_PATTERN.matcher(transform); if (matcher.matches()) { return format("bucket(%s, %s)", name, matcher.group(1)); } matcher = ICEBERG_TRUNCATE_PATTERN.matcher(transform); if (matcher.matches()) { return format("truncate(%s, %s)", name, matcher.group(1)); } throw new UnsupportedOperationException("Unsupported partition transform: " + field); }
Example #8
Source File: PartitionSpecVisitor.java From iceberg with Apache License 2.0 | 5 votes |
static <R> List<R> visit(Schema schema, PartitionSpec spec, PartitionSpecVisitor<R> visitor) { List<R> results = Lists.newArrayListWithExpectedSize(spec.fields().size()); for (PartitionField field : spec.fields()) { String sourceName = schema.findColumnName(field.sourceId()); Transform<?, ?> transform = field.transform(); if (transform instanceof Identity) { results.add(visitor.identity(sourceName, field.sourceId())); } else if (transform instanceof Bucket) { results.add(visitor.bucket(sourceName, field.sourceId(), ((Bucket<?>) transform).numBuckets())); } else if (transform instanceof Truncate) { results.add(visitor.truncate(sourceName, field.sourceId(), ((Truncate<?>) transform).width())); } else if (transform == Dates.YEAR || transform == Timestamps.YEAR) { results.add(visitor.year(sourceName, field.sourceId())); } else if (transform == Dates.MONTH || transform == Timestamps.MONTH) { results.add(visitor.month(sourceName, field.sourceId())); } else if (transform == Dates.DAY || transform == Timestamps.DAY) { results.add(visitor.day(sourceName, field.sourceId())); } else if (transform == Timestamps.HOUR) { results.add(visitor.hour(sourceName, field.sourceId())); } } return results; }
Example #9
Source File: PartitionTable.java From presto with Apache License 2.0 | 5 votes |
private List<ColumnMetadata> getPartitionColumnsMetadata(List<PartitionField> fields, Schema schema) { return fields.stream() .map(field -> new ColumnMetadata( field.name(), toPrestoType(field.transform().getResultType(schema.findType(field.sourceId())), typeManager))) .collect(toImmutableList()); }
Example #10
Source File: IcebergPageSink.java From presto with Apache License 2.0 | 5 votes |
public PartitionColumn(PartitionField field, int sourceChannel, Type sourceType, Type resultType, Function<Block, Block> blockTransform) { this.field = requireNonNull(field, "field is null"); this.sourceChannel = sourceChannel; this.sourceType = requireNonNull(sourceType, "sourceType is null"); this.resultType = requireNonNull(resultType, "resultType is null"); this.blockTransform = requireNonNull(blockTransform, "blockTransform is null"); }
Example #11
Source File: SparkTableUtil.java From iceberg with Apache License 2.0 | 5 votes |
private static List<DataFile> listAvroPartition( Map<String, String> partitionPath, String partitionUri, PartitionSpec spec, Configuration conf) { try { Path partition = new Path(partitionUri); FileSystem fs = partition.getFileSystem(conf); return Arrays.stream(fs.listStatus(partition, HIDDEN_PATH_FILTER)) .filter(FileStatus::isFile) .map(stat -> { Metrics metrics = new Metrics(-1L, null, null, null); String partitionKey = spec.fields().stream() .map(PartitionField::name) .map(name -> String.format("%s=%s", name, partitionPath.get(name))) .collect(Collectors.joining("/")); return DataFiles.builder(spec) .withPath(stat.getPath().toString()) .withFormat("avro") .withFileSizeInBytes(stat.getLen()) .withMetrics(metrics) .withPartitionPath(partitionKey) .build(); }).collect(Collectors.toList()); } catch (IOException e) { throw SparkExceptionUtil.toUncheckedException(e, "Unable to list files in partition: %s", partitionUri); } }
Example #12
Source File: SparkTableUtil.java From iceberg with Apache License 2.0 | 5 votes |
private static List<DataFile> listOrcPartition( Map<String, String> partitionPath, String partitionUri, PartitionSpec spec, Configuration conf) { try { Path partition = new Path(partitionUri); FileSystem fs = partition.getFileSystem(conf); return Arrays.stream(fs.listStatus(partition, HIDDEN_PATH_FILTER)) .filter(FileStatus::isFile) .map(stat -> { Metrics metrics = OrcMetrics.fromInputFile(HadoopInputFile.fromPath(stat.getPath(), conf)); String partitionKey = spec.fields().stream() .map(PartitionField::name) .map(name -> String.format("%s=%s", name, partitionPath.get(name))) .collect(Collectors.joining("/")); return DataFiles.builder(spec) .withPath(stat.getPath().toString()) .withFormat("orc") .withFileSizeInBytes(stat.getLen()) .withMetrics(metrics) .withPartitionPath(partitionKey) .build(); }).collect(Collectors.toList()); } catch (IOException e) { throw SparkExceptionUtil.toUncheckedException(e, "Unable to list files in partition: %s", partitionUri); } }
Example #13
Source File: IcebergSplitSource.java From presto with Apache License 2.0 | 5 votes |
private static Map<Integer, String> getPartitionKeys(FileScanTask scanTask) { StructLike partition = scanTask.file().partition(); PartitionSpec spec = scanTask.spec(); Map<PartitionField, Integer> fieldToIndex = getIdentityPartitions(spec); Map<Integer, String> partitionKeys = new HashMap<>(); fieldToIndex.forEach((field, index) -> { int id = field.sourceId(); Type type = spec.schema().findType(id); Class<?> javaClass = type.typeId().javaClass(); Object value = partition.get(index, javaClass); if (value == null) { partitionKeys.put(id, null); } else { String partitionValue; if (type.typeId() == FIXED || type.typeId() == BINARY) { // this is safe because Iceberg PartitionData directly wraps the byte array partitionValue = new String(((ByteBuffer) value).array(), UTF_8); } else { partitionValue = value.toString(); } partitionKeys.put(id, partitionValue); } }); return Collections.unmodifiableMap(partitionKeys); }
Example #14
Source File: SparkUtil.java From iceberg with Apache License 2.0 | 5 votes |
/** * Check whether the partition transforms in a spec can be used to write data. * * @param spec a PartitionSpec * @throws UnsupportedOperationException if the spec contains unknown partition transforms */ public static void validatePartitionTransforms(PartitionSpec spec) { if (spec.fields().stream().anyMatch(field -> field.transform() instanceof UnknownTransform)) { String unsupported = spec.fields().stream() .map(PartitionField::transform) .filter(transform -> transform instanceof UnknownTransform) .map(Transform::toString) .collect(Collectors.joining(", ")); throw new UnsupportedOperationException( String.format("Cannot write using unsupported transforms: %s", unsupported)); } }
Example #15
Source File: DataAdditionCmdHandler.java From dremio-oss with Apache License 2.0 | 5 votes |
public void validateIcebergSchemaForInsertCommand(List<String> fieldNames) { IcebergTableProps icebergTableProps = icebergCreateTableEntry.getIcebergTableProps(); Preconditions.checkState(icebergTableProps.getIcebergOpType() == IcebergOperation.Type.INSERT, "unexpected state found"); BatchSchema querySchema = icebergTableProps.getFullSchema(); IcebergTableOperations tableOperations = new IcebergTableOperations( new org.apache.hadoop.fs.Path(icebergTableProps.getTableLocation()), icebergCreateTableEntry.getPlugin().getFsConfCopy()); BatchSchema icebergSchema = new SchemaConverter().fromIceberg( tableOperations.current().schema()); // this check can be removed once we support schema evolution in dremio. if (!icebergSchema.equalsIgnoreCase(tableSchemaFromKVStore)) { throw UserException.validationError().message("The schema for table %s does not match with the iceberg %s.", tableSchemaFromKVStore, icebergSchema).buildSilently(); } List<String> icebergPartitionColumns = tableOperations.current().spec().fields().stream() .map(PartitionField::name).collect(Collectors.toList()); // this check can be removed once we support partition spec evolution in dremio. if (!comparePartitionColumnLists(icebergPartitionColumns)) { throw UserException.validationError().message("The table partition columns %s do not match with the iceberg partition columns %s.", partitionColumns.toString(), icebergPartitionColumns.toString()).buildSilently(); } BatchSchema partSchemaWithSelectedFields = tableSchemaFromKVStore.subset(fieldNames).orElse(tableSchemaFromKVStore); if (!querySchema.equalsIgnoreCase(partSchemaWithSelectedFields)) { throw UserException.validationError().message("Table %s doesn't match with query %s.", partSchemaWithSelectedFields, querySchema).buildSilently(); } }
Example #16
Source File: IcebergTableWrapper.java From dremio-oss with Apache License 2.0 | 5 votes |
private void buildPartitionColumns() { partitionColumns = table .spec() .fields() .stream() .map(PartitionField::sourceId) .map(schema::findColumnName) // column name from schema .collect(Collectors.toList()); }
Example #17
Source File: PartitionTable.java From presto with Apache License 2.0 | 5 votes |
private List<Type> partitionTypes(List<PartitionField> partitionFields) { ImmutableList.Builder<Type> partitionTypeBuilder = ImmutableList.builder(); for (PartitionField partitionField : partitionFields) { Type.PrimitiveType sourceType = idToTypeMapping.get(partitionField.sourceId()); Type type = partitionField.transform().getResultType(sourceType); partitionTypeBuilder.add(type); } return partitionTypeBuilder.build(); }
Example #18
Source File: IcebergPageSink.java From presto with Apache License 2.0 | 4 votes |
public PartitionField getField() { return field; }
Example #19
Source File: ResidualEvaluator.java From iceberg with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unchecked") public <T> Expression predicate(BoundPredicate<T> pred) { // Get the strict projection and inclusive projection of this predicate in partition data, // then use them to determine whether to return the original predicate. The strict projection // returns true iff the original predicate would have returned true, so the predicate can be // eliminated if the strict projection evaluates to true. Similarly the inclusive projection // returns false iff the original predicate would have returned false, so the predicate can // also be eliminated if the inclusive projection evaluates to false. // If there is no strict projection or if it evaluates to false, then return the predicate. List<PartitionField> parts = spec.getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { return pred; // not associated inclusive a partition field, can't be evaluated } for (PartitionField part : parts) { // checking the strict projection UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred); Expression strictResult = null; if (strictProjection != null) { Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive); if (bound instanceof BoundPredicate) { strictResult = super.predicate((BoundPredicate<?>) bound); } else { // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse strictResult = bound; } } if (strictResult != null && strictResult.op() == Expression.Operation.TRUE) { // If strict is true, returning true return Expressions.alwaysTrue(); } // checking the inclusive projection UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred); Expression inclusiveResult = null; if (inclusiveProjection != null) { Expression boundInclusive = inclusiveProjection.bind(spec.partitionType(), caseSensitive); if (boundInclusive instanceof BoundPredicate) { // using predicate method specific to inclusive inclusiveResult = super.predicate((BoundPredicate<?>) boundInclusive); } else { // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse inclusiveResult = boundInclusive; } } if (inclusiveResult != null && inclusiveResult.op() == Expression.Operation.FALSE) { // If inclusive is false, returning false return Expressions.alwaysFalse(); } } // neither strict not inclusive predicate was conclusive, returning the original pred return pred; }