org.apache.iceberg.transforms.Transforms Java Examples
The following examples show how to use
org.apache.iceberg.transforms.Transforms.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundTransform.java From iceberg with Apache License 2.0 | 8 votes |
@SuppressWarnings("unchecked") @Override public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) { BoundReference<S> boundRef = ref.bind(struct, caseSensitive); Transform<S, T> typeTransform; try { // TODO: Avoid using toString/fromString typeTransform = (Transform<S, T>) Transforms.fromString(boundRef.type(), transform.toString()); ValidationException.check(typeTransform.canTransform(boundRef.type()), "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name()); } catch (IllegalArgumentException e) { throw new ValidationException( "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name()); } return new BoundTransform<>(boundRef, typeTransform); }
Example #2
Source File: TestFilteredScan.java From iceberg with Apache License 2.0 | 7 votes |
@BeforeClass public static void startSpark() { TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); // define UDFs used by partition tests Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4); spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$); Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone()); spark.udf().register("ts_day", (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone()); spark.udf().register("ts_hour", (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$); spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$); }
Example #3
Source File: TestFilteredScan.java From iceberg with Apache License 2.0 | 6 votes |
@BeforeClass public static void startSpark() { TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); // define UDFs used by partition tests Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4); spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$); Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone()); spark.udf().register("ts_day", (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone()); spark.udf().register("ts_hour", (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$); spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$); }
Example #4
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
Builder identity(String sourceName, String targetName) { Types.NestedField sourceColumn = findSourceColumn(sourceName); checkAndAddPartitionName(targetName, sourceColumn.fieldId()); fields.add(new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.identity(sourceColumn.type()))); return this; }
Example #5
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
Builder add(int sourceId, int fieldId, String name, String transform) { Types.NestedField column = schema.findField(sourceId); checkAndAddPartitionName(name, column.fieldId()); Preconditions.checkNotNull(column, "Cannot find source column: %s", sourceId); fields.add(new PartitionField(sourceId, fieldId, name, Transforms.fromString(column.type(), transform))); lastAssignedFieldId.getAndAccumulate(fieldId, Math::max); return this; }
Example #6
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder truncate(String sourceName, int width, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); fields.add(new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.truncate(sourceColumn.type(), width))); return this; }
Example #7
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder bucket(String sourceName, int numBuckets, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); fields.add(new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.bucket(sourceColumn.type(), numBuckets))); return this; }
Example #8
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder hour(String sourceName, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.hour(sourceColumn.type())); checkForRedundantPartitions(field); fields.add(field); return this; }
Example #9
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder day(String sourceName, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.day(sourceColumn.type())); checkForRedundantPartitions(field); fields.add(field); return this; }
Example #10
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder month(String sourceName, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.month(sourceColumn.type())); checkForRedundantPartitions(field); fields.add(field); return this; }
Example #11
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 5 votes |
public Builder year(String sourceName, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year(sourceColumn.type())); checkForRedundantPartitions(field); fields.add(field); return this; }
Example #12
Source File: TestPartitionTransforms.java From presto with Apache License 2.0 | 5 votes |
@Test public void testToStringMatchesSpecification() { assertEquals(Transforms.identity(StringType.get()).toString(), "identity"); assertEquals(Transforms.bucket(StringType.get(), 13).toString(), "bucket[13]"); assertEquals(Transforms.truncate(StringType.get(), 19).toString(), "truncate[19]"); assertEquals(Transforms.year(DateType.get()).toString(), "year"); assertEquals(Transforms.month(DateType.get()).toString(), "month"); assertEquals(Transforms.day(DateType.get()).toString(), "day"); assertEquals(Transforms.hour(TimestampType.withoutZone()).toString(), "hour"); }
Example #13
Source File: FilesTable.java From presto with Apache License 2.0 | 4 votes |
private static List<Page> buildPages(ConnectorTableMetadata tableMetadata, ConnectorSession session, Table icebergTable, Optional<Long> snapshotId) { PageListBuilder pagesBuilder = PageListBuilder.forTable(tableMetadata); TableScan tableScan = getTableScan(session, TupleDomain.all(), snapshotId, icebergTable).includeColumnStats(); Map<Integer, Type> idToTypeMapping = getIcebergIdToTypeMapping(icebergTable.schema()); tableScan.planFiles().forEach(fileScanTask -> { DataFile dataFile = fileScanTask.file(); pagesBuilder.beginRow(); pagesBuilder.appendVarchar(dataFile.path().toString()); pagesBuilder.appendVarchar(dataFile.format().name()); pagesBuilder.appendBigint(dataFile.recordCount()); pagesBuilder.appendBigint(dataFile.fileSizeInBytes()); if (checkNonNull(dataFile.columnSizes(), pagesBuilder)) { pagesBuilder.appendIntegerBigintMap(dataFile.columnSizes()); } if (checkNonNull(dataFile.valueCounts(), pagesBuilder)) { pagesBuilder.appendIntegerBigintMap(dataFile.valueCounts()); } if (checkNonNull(dataFile.nullValueCounts(), pagesBuilder)) { pagesBuilder.appendIntegerBigintMap(dataFile.nullValueCounts()); } if (checkNonNull(dataFile.lowerBounds(), pagesBuilder)) { pagesBuilder.appendIntegerVarcharMap(dataFile.lowerBounds().entrySet().stream() .collect(toImmutableMap( Map.Entry<Integer, ByteBuffer>::getKey, entry -> Transforms.identity(idToTypeMapping.get(entry.getKey())).toHumanString( Conversions.fromByteBuffer(idToTypeMapping.get(entry.getKey()), entry.getValue()))))); } if (checkNonNull(dataFile.upperBounds(), pagesBuilder)) { pagesBuilder.appendIntegerVarcharMap(dataFile.upperBounds().entrySet().stream() .collect(toImmutableMap( Map.Entry<Integer, ByteBuffer>::getKey, entry -> Transforms.identity(idToTypeMapping.get(entry.getKey())).toHumanString( Conversions.fromByteBuffer(idToTypeMapping.get(entry.getKey()), entry.getValue()))))); } if (checkNonNull(dataFile.keyMetadata(), pagesBuilder)) { pagesBuilder.appendVarbinary(Slices.wrappedBuffer(dataFile.keyMetadata())); } if (checkNonNull(dataFile.splitOffsets(), pagesBuilder)) { pagesBuilder.appendBigintArray(dataFile.splitOffsets()); } pagesBuilder.endRow(); }); return pagesBuilder.build(); }
Example #14
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
public static <T> UnboundTerm<T> truncate(String name, int width) { return new UnboundTransform<>(ref(name), Transforms.truncate(Types.LongType.get(), width)); }
Example #15
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public static <T> UnboundTerm<T> hour(String name) { return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.hour(Types.TimestampType.withZone())); }
Example #16
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public static <T> UnboundTerm<T> day(String name) { return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.day(Types.TimestampType.withZone())); }
Example #17
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public static <T> UnboundTerm<T> month(String name) { return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.month(Types.TimestampType.withZone())); }
Example #18
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public static <T> UnboundTerm<T> year(String name) { return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.year(Types.TimestampType.withZone())); }
Example #19
Source File: PartitionSpec.java From iceberg with Apache License 2.0 | 4 votes |
public Builder alwaysNull(String sourceName, String targetName) { checkAndAddPartitionName(targetName); Types.NestedField sourceColumn = findSourceColumn(sourceName); fields.add(new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.alwaysNull())); return this; }
Example #20
Source File: Expressions.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public static <T> UnboundTerm<T> bucket(String name, int numBuckets) { Transform<?, T> transform = (Transform<?, T>) Transforms.bucket(Types.StringType.get(), numBuckets); return new UnboundTransform<>(ref(name), transform); }