org.apache.iceberg.expressions.Expressions Java Examples
The following examples show how to use
org.apache.iceberg.expressions.Expressions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMetadataTableScans.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testDataFilesTableHonorsIgnoreResiduals() throws IOException { table.newFastAppend() .appendFile(FILE_A) .appendFile(FILE_B) .commit(); Table dataFilesTable = new DataFilesTable(table.ops(), table); TableScan scan1 = dataFilesTable.newScan() .filter(Expressions.equal("id", 5)); validateTaskScanResiduals(scan1, false); TableScan scan2 = dataFilesTable.newScan() .filter(Expressions.equal("id", 5)) .ignoreResiduals(); validateTaskScanResiduals(scan2, true); }
Example #2
Source File: ScanSummary.java From iceberg with Apache License 2.0 | 6 votes |
private void removeTimeFilters(List<Expression> expressions, Expression expression) { if (expression.op() == Operation.AND) { And and = (And) expression; removeTimeFilters(expressions, and.left()); removeTimeFilters(expressions, and.right()); return; } else if (expression instanceof UnboundPredicate) { UnboundPredicate pred = (UnboundPredicate) expression; if (pred.term() instanceof NamedReference) { NamedReference<?> ref = (NamedReference<?>) pred.term(); Literal<?> lit = pred.literal(); if (TIMESTAMP_NAMES.contains(ref.name())) { Literal<Long> tsLiteral = lit.to(Types.TimestampType.withoutZone()); long millis = toMillis(tsLiteral.value()); addTimestampFilter(Expressions.predicate(pred.op(), "timestamp_ms", millis)); return; } } } expressions.add(expression); }
Example #3
Source File: Truncate.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<Integer> projectStrict(String name, BoundPredicate<Integer> pred) { if (pred.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, pred); } // TODO: for integers, can this return the original predicate? // No. the predicate needs to be in terms of the applied value. For all x, apply(x) <= x. // Therefore, the lower bound can be transformed outside of a greater-than bound. if (pred instanceof BoundUnaryPredicate) { return Expressions.predicate(pred.op(), name); } else if (pred instanceof BoundLiteralPredicate) { return ProjectionUtil.truncateIntegerStrict(name, pred.asLiteralPredicate(), this); } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.NOT_IN) { return ProjectionUtil.transformSet(name, pred.asSetPredicate(), this); } return null; }
Example #4
Source File: TestFindFiles.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testCaseSensitivity() { table.newAppend() .appendFile(FILE_A) .appendFile(FILE_B) .appendFile(FILE_C) .appendFile(FILE_D) .commit(); Iterable<DataFile> files = FindFiles.in(table) .caseInsensitive() .withMetadataMatching(Expressions.startsWith("FILE_PATH", "/path/to/data-a")) .collect(); Assert.assertEquals(pathSet(FILE_A), pathSet(files)); }
Example #5
Source File: TestMetadataTableScans.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testAllEntriesTableHonorsIgnoreResiduals() throws IOException { table.newFastAppend() .appendFile(FILE_A) .appendFile(FILE_B) .commit(); Table allEntriesTable = new AllEntriesTable(table.ops(), table); TableScan scan1 = allEntriesTable.newScan() .filter(Expressions.equal("id", 5)); validateTaskScanResiduals(scan1, false); TableScan scan2 = allEntriesTable.newScan() .filter(Expressions.equal("id", 5)) .ignoreResiduals(); validateTaskScanResiduals(scan2, true); }
Example #6
Source File: Truncate.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<CharSequence> projectStrict(String name, BoundPredicate<CharSequence> predicate) { if (predicate.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, predicate); } if (predicate instanceof BoundUnaryPredicate) { return Expressions.predicate(predicate.op(), name); } else if (predicate instanceof BoundLiteralPredicate) { BoundLiteralPredicate<CharSequence> pred = predicate.asLiteralPredicate(); if (pred.op() == Expression.Operation.STARTS_WITH) { if (pred.literal().value().length() < width()) { return Expressions.predicate(pred.op(), name, pred.literal().value()); } else if (pred.literal().value().length() == width()) { return Expressions.equal(name, pred.literal().value()); } } else { return ProjectionUtil.truncateArrayStrict(name, pred, this); } } else if (predicate.isSetPredicate() && predicate.op() == Expression.Operation.NOT_IN) { return ProjectionUtil.transformSet(name, predicate.asSetPredicate(), this); } return null; }
Example #7
Source File: Truncate.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<ByteBuffer> project(String name, BoundPredicate<ByteBuffer> pred) { if (pred.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, pred); } if (pred.isUnaryPredicate()) { return Expressions.predicate(pred.op(), name); } else if (pred.isLiteralPredicate()) { return ProjectionUtil.truncateArray(name, pred.asLiteralPredicate(), this); } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.IN) { return ProjectionUtil.transformSet(name, pred.asSetPredicate(), this); } return null; }
Example #8
Source File: Truncate.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<ByteBuffer> projectStrict(String name, BoundPredicate<ByteBuffer> pred) { if (pred.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, pred); } if (pred.isUnaryPredicate()) { return Expressions.predicate(pred.op(), name); } else if (pred.isLiteralPredicate()) { return ProjectionUtil.truncateArrayStrict(name, pred.asLiteralPredicate(), this); } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.NOT_IN) { return ProjectionUtil.transformSet(name, pred.asSetPredicate(), this); } return null; }
Example #9
Source File: Truncate.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<BigDecimal> project(String name, BoundPredicate<BigDecimal> pred) { if (pred.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, pred); } if (pred.isUnaryPredicate()) { return Expressions.predicate(pred.op(), name); } else if (pred.isLiteralPredicate()) { return ProjectionUtil.truncateDecimal(name, pred.asLiteralPredicate(), this); } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.IN) { return ProjectionUtil.transformSet(name, pred.asSetPredicate(), this); } return null; }
Example #10
Source File: Bucket.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<Integer> project(String name, BoundPredicate<T> predicate) { if (predicate.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, predicate); } if (predicate.isUnaryPredicate()) { return Expressions.predicate(predicate.op(), name); } else if (predicate.isLiteralPredicate() && predicate.op() == Expression.Operation.EQ) { return Expressions.predicate( predicate.op(), name, apply(predicate.asLiteralPredicate().literal().value())); } else if (predicate.isSetPredicate() && predicate.op() == Expression.Operation.IN) { // notIn can't be projected return ProjectionUtil.transformSet(name, predicate.asSetPredicate(), this); } // comparison predicates can't be projected, notEq can't be projected // TODO: small ranges can be projected. // for example, (x > 0) and (x < 3) can be turned into in({1, 2}) and projected. return null; }
Example #11
Source File: Bucket.java From iceberg with Apache License 2.0 | 6 votes |
@Override public UnboundPredicate<Integer> projectStrict(String name, BoundPredicate<T> predicate) { if (predicate.term() instanceof BoundTransform) { return ProjectionUtil.projectTransformPredicate(this, name, predicate); } if (predicate.isUnaryPredicate()) { return Expressions.predicate(predicate.op(), name); } else if (predicate.isLiteralPredicate() && predicate.op() == Expression.Operation.NOT_EQ) { // TODO: need to translate not(eq(...)) into notEq in expressions return Expressions.predicate(predicate.op(), name, apply(predicate.asLiteralPredicate().literal().value())); } else if (predicate.isSetPredicate() && predicate.op() == Expression.Operation.NOT_IN) { return ProjectionUtil.transformSet(name, predicate.asSetPredicate(), this); } // no strict projection for comparison or equality return null; }
Example #12
Source File: TestIcebergInputFormat.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testFilterExp() throws Exception { File location = temp.newFolder(format.name()); Assert.assertTrue(location.delete()); Table table = tables.create(SCHEMA, SPEC, ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()), location.toString()); List<Record> expectedRecords = RandomGenericData.generate(table.schema(), 2, 0L); expectedRecords.get(0).set(2, "2020-03-20"); expectedRecords.get(1).set(2, "2020-03-20"); DataFile dataFile1 = writeFile(table, Row.of("2020-03-20", 0), format, expectedRecords); DataFile dataFile2 = writeFile(table, Row.of("2020-03-21", 0), format, RandomGenericData.generate(table.schema(), 2, 0L)); table.newAppend() .appendFile(dataFile1) .appendFile(dataFile2) .commit(); Job job = Job.getInstance(conf); IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job); configBuilder.readFrom(location.toString()) .filter(Expressions.equal("date", "2020-03-20")); validate(job, expectedRecords); }
Example #13
Source File: TestFindFiles.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testWithRecordsMatching() { table.newAppend() .appendFile(DataFiles.builder(SPEC) .withInputFile(Files.localInput("/path/to/data-e.parquet")) .withPartitionPath("data_bucket=4") .withMetrics(new Metrics(3L, null, // no column sizes ImmutableMap.of(1, 3L), // value count ImmutableMap.of(1, 0L), // null count ImmutableMap.of(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1)), // lower bounds ImmutableMap.of(1, Conversions.toByteBuffer(Types.IntegerType.get(), 5)))) // lower bounds .build()) .commit(); final Iterable<DataFile> files = FindFiles.in(table) .withRecordsMatching(Expressions.equal("id", 1)) .collect(); Assert.assertEquals(Sets.newHashSet("/path/to/data-e.parquet"), pathSet(files)); }
Example #14
Source File: IcebergStorage.java From iceberg with Apache License 2.0 | 6 votes |
private org.apache.iceberg.expressions.Expression convert(OpType op, Column col, Const constant) { String name = col.getName(); Object value = constant.getValue(); switch (op) { case OP_GE: return Expressions.greaterThanOrEqual(name, value); case OP_GT: return Expressions.greaterThan(name, value); case OP_LE: return Expressions.lessThanOrEqual(name, value); case OP_LT: return Expressions.lessThan(name, value); case OP_EQ: return Expressions.equal(name, value); case OP_NE: return Expressions.notEqual(name, value); } throw new RuntimeException( String.format("[%s]: Failed to pushdown expression: %s %s %s", signature, col, op, constant)); }
Example #15
Source File: TestMetadataTableScans.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testAllManifestsTableHonorsIgnoreResiduals() throws IOException { table.newFastAppend() .appendFile(FILE_A) .appendFile(FILE_B) .commit(); Table allManifestsTable = new AllManifestsTable(table.ops(), table); TableScan scan1 = allManifestsTable.newScan() .filter(Expressions.equal("id", 5)); validateTaskScanResiduals(scan1, false); TableScan scan2 = allManifestsTable.newScan() .filter(Expressions.equal("id", 5)) .ignoreResiduals(); validateTaskScanResiduals(scan2, true); }
Example #16
Source File: TestResiduals.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testUnpartitionedResiduals() { Expression[] expressions = new Expression[] { Expressions.alwaysTrue(), Expressions.alwaysFalse(), Expressions.lessThan("a", 5), Expressions.greaterThanOrEqual("b", 16), Expressions.notNull("c"), Expressions.isNull("d"), Expressions.in("e", 1, 2, 3), Expressions.notIn("f", 1, 2, 3) }; for (Expression expr : expressions) { ResidualEvaluator residualEvaluator = ResidualEvaluator.of(PartitionSpec.unpartitioned(), expr, true); Assert.assertEquals("Should return expression", expr, residualEvaluator.residualFor(Row.of())); } }
Example #17
Source File: TestFilterFiles.java From iceberg with Apache License 2.0 | 6 votes |
private void testCaseInsensitiveFilterFiles(Table table) { Map<Integer, ByteBuffer> lowerBounds = new HashMap<>(); Map<Integer, ByteBuffer> upperBounds = new HashMap<>(); lowerBounds.put(1, Conversions.toByteBuffer(Types.IntegerType.get(), 1)); upperBounds.put(1, Conversions.toByteBuffer(Types.IntegerType.get(), 2)); Metrics metrics = new Metrics(2L, Maps.newHashMap(), Maps.newHashMap(), Maps.newHashMap(), lowerBounds, upperBounds); DataFile file = DataFiles.builder(table.spec()) .withPath("/path/to/file.parquet") .withFileSizeInBytes(0) .withMetrics(metrics) .build(); table.newAppend().appendFile(file).commit(); table.refresh(); TableScan emptyScan = table.newScan().caseSensitive(false).filter(Expressions.equal("ID", 5)); assertEquals(0, Iterables.size(emptyScan.planFiles())); TableScan nonEmptyScan = table.newScan().caseSensitive(false).filter(Expressions.equal("ID", 1)); assertEquals(1, Iterables.size(nonEmptyScan.planFiles())); }
Example #18
Source File: TestMetadataTableScans.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testManifestsTableAlwaysIgnoresResiduals() throws IOException { table.newFastAppend() .appendFile(FILE_A) .appendFile(FILE_B) .commit(); Table manifestsTable = new ManifestsTable(table.ops(), table); TableScan scan = manifestsTable.newScan() .filter(Expressions.equal("id", 5)); try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) { Assert.assertTrue("Tasks should not be empty", Iterables.size(tasks) > 0); for (FileScanTask task : tasks) { Assert.assertEquals("Residuals must be ignored", Expressions.alwaysTrue(), task.residual()); } } }
Example #19
Source File: DataFilesTable.java From iceberg with Apache License 2.0 | 6 votes |
@Override protected CloseableIterable<FileScanTask> planFiles( TableOperations ops, Snapshot snapshot, Expression rowFilter, boolean ignoreResiduals, boolean caseSensitive, boolean colStats) { CloseableIterable<ManifestFile> manifests = CloseableIterable.withNoopClose(snapshot.dataManifests()); String schemaString = SchemaParser.toJson(schema()); String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned()); Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter; ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter); // Data tasks produce the table schema, not the projection schema and projection is done by processing engines. // This data task needs to use the table schema, which may not include a partition schema to avoid having an // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in // all cases. return CloseableIterable.transform(manifests, manifest -> new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals)); }
Example #20
Source File: ManifestReader.java From iceberg with Apache License 2.0 | 6 votes |
CloseableIterable<ManifestEntry<F>> entries() { if ((rowFilter != null && rowFilter != Expressions.alwaysTrue()) || (partFilter != null && partFilter != Expressions.alwaysTrue())) { Evaluator evaluator = evaluator(); InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator(); // ensure stats columns are present for metrics evaluation boolean requireStatsProjection = requireStatsProjection(rowFilter, columns); Collection<String> projectColumns = requireStatsProjection ? withStatsColumns(columns) : columns; return CloseableIterable.filter( open(projection(fileSchema, fileProjection, projectColumns, caseSensitive)), entry -> entry != null && evaluator.eval(entry.file().partition()) && metricsEvaluator.eval(entry.file())); } else { return open(projection(fileSchema, fileProjection, columns, caseSensitive)); } }
Example #21
Source File: TestLocalScan.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testProjectWithMissingFilterColumn() { Iterable<Record> results = IcebergGenerics.read(sharedTable) .where(Expressions.greaterThanOrEqual("id", 1)) .where(Expressions.lessThan("id", 21)) .select("data").build(); Set<String> expected = Sets.newHashSet(); for (Record record : concat(file1Records, file2Records, file3Records)) { Long id = (Long) record.getField("id"); if (id >= 1 && id < 21) { expected.add(record.getField("data").toString()); } } results.forEach(record -> Assert.assertEquals("Record should have two projected fields", 2, record.size())); Assert.assertEquals("Should project correct rows", expected, Sets.newHashSet(transform(results, record -> record.getField("data").toString()))); }
Example #22
Source File: ManifestReader.java From iceberg with Apache License 2.0 | 5 votes |
private Evaluator evaluator() { if (lazyEvaluator == null) { Expression projected = Projections.inclusive(spec, caseSensitive).project(rowFilter); Expression finalPartFilter = Expressions.and(projected, partFilter); if (finalPartFilter != null) { this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive); } else { this.lazyEvaluator = new Evaluator(spec.partitionType(), Expressions.alwaysTrue(), caseSensitive); } } return lazyEvaluator; }
Example #23
Source File: VectorizedParquetReader.java From iceberg with Apache License 2.0 | 5 votes |
public VectorizedParquetReader( InputFile input, Schema expectedSchema, ParquetReadOptions options, Function<MessageType, VectorizedReader<?>> readerFunc, NameMapping nameMapping, Expression filter, boolean reuseContainers, boolean caseSensitive, int maxRecordsPerBatch) { this.input = input; this.expectedSchema = expectedSchema; this.options = options; this.batchReaderFunc = readerFunc; // replace alwaysTrue with null to avoid extra work evaluating a trivial filter this.filter = filter == Expressions.alwaysTrue() ? null : filter; this.reuseContainers = reuseContainers; this.caseSensitive = caseSensitive; this.batchSize = maxRecordsPerBatch; this.nameMapping = nameMapping; }
Example #24
Source File: SparkFilters.java From iceberg with Apache License 2.0 | 5 votes |
public static Expression convert(Filter[] filters) { Expression expression = Expressions.alwaysTrue(); for (Filter filter : filters) { Expression converted = convert(filter); Preconditions.checkArgument(converted != null, "Cannot convert filter to Iceberg: %s", filter); expression = Expressions.and(expression, converted); } return expression; }
Example #25
Source File: ManifestEntriesTable.java From iceberg with Apache License 2.0 | 5 votes |
@Override protected CloseableIterable<FileScanTask> planFiles( TableOperations ops, Snapshot snapshot, Expression rowFilter, boolean ignoreResiduals, boolean caseSensitive, boolean colStats) { // return entries from both data and delete manifests CloseableIterable<ManifestFile> manifests = CloseableIterable.withNoopClose(snapshot.allManifests()); Schema fileSchema = new Schema(schema().findType("data_file").asStructType().fields()); String schemaString = SchemaParser.toJson(schema()); String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned()); Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter; ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter); return CloseableIterable.transform(manifests, manifest -> new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals)); }
Example #26
Source File: BaseReplacePartitions.java From iceberg with Apache License 2.0 | 5 votes |
@Override public List<ManifestFile> apply(TableMetadata base) { if (writeSpec().fields().size() <= 0) { // replace all data in an unpartitioned table deleteByRowFilter(Expressions.alwaysTrue()); } try { return super.apply(base); } catch (ManifestFilterManager.DeleteException e) { throw new ValidationException( "Cannot commit file that conflicts with existing partition: %s", e.partition()); } }
Example #27
Source File: SparkWriteBuilder.java From iceberg with Apache License 2.0 | 5 votes |
@Override public WriteBuilder overwrite(Filter[] filters) { this.overwriteExpr = SparkFilters.convert(filters); if (overwriteExpr == Expressions.alwaysTrue() && "dynamic".equals(overwriteMode)) { // use the write option to override truncating the table. use dynamic overwrite instead. this.overwriteDynamic = true; } else { Preconditions.checkState(!overwriteDynamic, "Cannot overwrite dynamically and by filter: %s", overwriteExpr); this.overwriteByFilter = true; } return this; }
Example #28
Source File: TestMetadataTableScans.java From iceberg with Apache License 2.0 | 5 votes |
private void validateTaskScanResiduals(TableScan scan, boolean ignoreResiduals) throws IOException { try (CloseableIterable<CombinedScanTask> tasks = scan.planTasks()) { Assert.assertTrue("Tasks should not be empty", Iterables.size(tasks) > 0); for (CombinedScanTask combinedScanTask : tasks) { for (FileScanTask fileScanTask : combinedScanTask.files()) { if (ignoreResiduals) { Assert.assertEquals("Residuals must be ignored", Expressions.alwaysTrue(), fileScanTask.residual()); } else { Assert.assertNotEquals("Residuals must be preserved", Expressions.alwaysTrue(), fileScanTask.residual()); } } } } }
Example #29
Source File: ManifestGroup.java From iceberg with Apache License 2.0 | 5 votes |
ManifestGroup(FileIO io, Iterable<ManifestFile> manifests) { this.io = io; this.manifests = Sets.newHashSet(manifests); this.dataFilter = Expressions.alwaysTrue(); this.fileFilter = Expressions.alwaysTrue(); this.partitionFilter = Expressions.alwaysTrue(); this.ignoreDeleted = false; this.ignoreExisting = false; this.ignoreResiduals = false; this.columns = ManifestReader.ALL_COLUMNS; this.caseSensitive = true; this.manifestPredicate = m -> true; this.manifestEntryPredicate = e -> true; }
Example #30
Source File: ParquetReader.java From iceberg with Apache License 2.0 | 5 votes |
public ParquetReader(InputFile input, Schema expectedSchema, ParquetReadOptions options, Function<MessageType, ParquetValueReader<?>> readerFunc, NameMapping nameMapping, Expression filter, boolean reuseContainers, boolean caseSensitive) { this.input = input; this.expectedSchema = expectedSchema; this.options = options; this.readerFunc = readerFunc; // replace alwaysTrue with null to avoid extra work evaluating a trivial filter this.filter = filter == Expressions.alwaysTrue() ? null : filter; this.reuseContainers = reuseContainers; this.caseSensitive = caseSensitive; this.nameMapping = nameMapping; }