org.apache.iceberg.expressions.Expressions#alwaysTrue

Source File: TestResiduals.java From iceberg with Apache License 2.0

6 votes

@Test
public void testUnpartitionedResiduals() {
  Expression[] expressions = new Expression[] {
      Expressions.alwaysTrue(),
      Expressions.alwaysFalse(),
      Expressions.lessThan("a", 5),
      Expressions.greaterThanOrEqual("b", 16),
      Expressions.notNull("c"),
      Expressions.isNull("d"),
      Expressions.in("e", 1, 2, 3),
      Expressions.notIn("f", 1, 2, 3)
  };

  for (Expression expr : expressions) {
    ResidualEvaluator residualEvaluator = ResidualEvaluator.of(PartitionSpec.unpartitioned(), expr, true);
    Assert.assertEquals("Should return expression",
        expr, residualEvaluator.residualFor(Row.of()));
  }
}

Source File: ManifestReader.java From iceberg with Apache License 2.0

6 votes

CloseableIterable<ManifestEntry<F>> entries() {
  if ((rowFilter != null && rowFilter != Expressions.alwaysTrue()) ||
      (partFilter != null && partFilter != Expressions.alwaysTrue())) {
    Evaluator evaluator = evaluator();
    InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator();

    // ensure stats columns are present for metrics evaluation
    boolean requireStatsProjection = requireStatsProjection(rowFilter, columns);
    Collection<String> projectColumns = requireStatsProjection ? withStatsColumns(columns) : columns;

    return CloseableIterable.filter(
        open(projection(fileSchema, fileProjection, projectColumns, caseSensitive)),
        entry -> entry != null &&
            evaluator.eval(entry.file().partition()) &&
            metricsEvaluator.eval(entry.file()));
  } else {
    return open(projection(fileSchema, fileProjection, columns, caseSensitive));
  }
}

Source File: DataFilesTable.java From iceberg with Apache License 2.0

6 votes

@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = CloseableIterable.withNoopClose(snapshot.dataManifests());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}

Source File: AllDataFilesTable.java From iceberg with Apache License 2.0

6 votes

@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = allDataManifestFiles(ops.current().snapshots());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new DataFilesTable.ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}

Source File: ManifestGroup.java From iceberg with Apache License 2.0

5 votes

ManifestGroup(FileIO io, Iterable<ManifestFile> manifests) {
  this.io = io;
  this.manifests = Sets.newHashSet(manifests);
  this.dataFilter = Expressions.alwaysTrue();
  this.fileFilter = Expressions.alwaysTrue();
  this.partitionFilter = Expressions.alwaysTrue();
  this.ignoreDeleted = false;
  this.ignoreExisting = false;
  this.ignoreResiduals = false;
  this.columns = ManifestReader.ALL_COLUMNS;
  this.caseSensitive = true;
  this.manifestPredicate = m -> true;
  this.manifestEntryPredicate = e -> true;
}

Source File: ManifestReader.java From iceberg with Apache License 2.0

5 votes

private InclusiveMetricsEvaluator metricsEvaluator() {
  if (lazyMetricsEvaluator == null) {
    if (rowFilter != null) {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), rowFilter, caseSensitive);
    } else {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyMetricsEvaluator;
}

Source File: ManifestReader.java From iceberg with Apache License 2.0

5 votes

private Evaluator evaluator() {
  if (lazyEvaluator == null) {
    Expression projected = Projections.inclusive(spec, caseSensitive).project(rowFilter);
    Expression finalPartFilter = Expressions.and(projected, partFilter);
    if (finalPartFilter != null) {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive);
    } else {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyEvaluator;
}

Source File: TableScanContext.java From iceberg with Apache License 2.0

5 votes

TableScanContext() {
  this.snapshotId = null;
  this.rowFilter = Expressions.alwaysTrue();
  this.ignoreResiduals = false;
  this.caseSensitive = true;
  this.colStats = false;
  this.selectedColumns = null;
  this.options = ImmutableMap.of();
  this.fromSnapshotId = null;
  this.toSnapshotId = null;
}

Source File: ManifestReader.java From iceberg with Apache License 2.0

5 votes

static boolean dropStats(Expression rowFilter, Collection<String> columns) {
  // Make sure we only drop all stats if we had projected all stats
  // We do not drop stats even if we had partially added some stats columns
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      Sets.intersection(Sets.newHashSet(columns), STATS_COLUMNS).isEmpty();
}

Source File: ParquetFilters.java From iceberg with Apache License 2.0

5 votes

@Override
public <T> FilterPredicate predicate(UnboundPredicate<T> pred) {
  Expression bound = bind(pred);
  if (bound instanceof BoundPredicate) {
    return predicate((BoundPredicate<?>) bound);
  } else if (bound == Expressions.alwaysTrue()) {
    return AlwaysTrue.INSTANCE;
  } else if (bound == Expressions.alwaysFalse()) {
    return AlwaysFalse.INSTANCE;
  }
  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}

Source File: ParquetReader.java From iceberg with Apache License 2.0

5 votes

public ParquetReader(InputFile input, Schema expectedSchema, ParquetReadOptions options,
                     Function<MessageType, ParquetValueReader<?>> readerFunc, NameMapping nameMapping,
                     Expression filter, boolean reuseContainers, boolean caseSensitive) {
  this.input = input;
  this.expectedSchema = expectedSchema;
  this.options = options;
  this.readerFunc = readerFunc;
  // replace alwaysTrue with null to avoid extra work evaluating a trivial filter
  this.filter = filter == Expressions.alwaysTrue() ? null : filter;
  this.reuseContainers = reuseContainers;
  this.caseSensitive = caseSensitive;
  this.nameMapping = nameMapping;
}

Source File: OrcIterable.java From iceberg with Apache License 2.0

5 votes

OrcIterable(InputFile file, Configuration config, Schema schema,
            Long start, Long length,
            Function<TypeDescription, OrcRowReader<?>> readerFunction, boolean caseSensitive, Expression filter) {
  this.schema = schema;
  this.readerFunction = readerFunction;
  this.file = file;
  this.start = start;
  this.length = length;
  this.config = config;
  this.caseSensitive = caseSensitive;
  this.filter = (filter == Expressions.alwaysTrue()) ? null : filter;
}

Source File: IcebergFilterGenerator.java From metacat with Apache License 2.0

5 votes

private Expression evalSingleTerm(final ASTCOMPARE node, final Object data) {
    final Object value = node.jjtGetChild(0).jjtAccept(this, data);
    if (value != null) {
        return Boolean.parseBoolean(value.toString())
            ? Expressions.alwaysTrue() : Expressions.alwaysFalse();
    }
    return Expressions.alwaysFalse();
}

Source File: SparkWriteBuilder.java From iceberg with Apache License 2.0

5 votes

@Override
public WriteBuilder overwrite(Filter[] filters) {
  this.overwriteExpr = SparkFilters.convert(filters);
  if (overwriteExpr == Expressions.alwaysTrue() && "dynamic".equals(overwriteMode)) {
    // use the write option to override truncating the table. use dynamic overwrite instead.
    this.overwriteDynamic = true;
  } else {
    Preconditions.checkState(!overwriteDynamic, "Cannot overwrite dynamically and by filter: %s", overwriteExpr);
    this.overwriteByFilter = true;
  }
  return this;
}

Source File: SparkStreamingWrite.java From iceberg with Apache License 2.0

5 votes

SparkStreamingWrite(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                    CaseInsensitiveStringMap options, boolean truncateBatches, String queryId,
                    String applicationId, String wapId, Schema writeSchema, StructType dsSchema) {
  super(
      table, io, encryptionManager, options, false, truncateBatches, Expressions.alwaysTrue(), applicationId, wapId,
      writeSchema, dsSchema);
  this.truncateBatches = truncateBatches;
  this.queryId = queryId;
}

Source File: RewriteDataFilesAction.java From iceberg with Apache License 2.0

5 votes

RewriteDataFilesAction(SparkSession spark, Table table) {
  this.sparkContext = new JavaSparkContext(spark.sparkContext());
  this.table = table;
  this.spec = table.spec();
  this.filter = Expressions.alwaysTrue();
  this.caseSensitive = Boolean.parseBoolean(spark.conf().get("spark.sql.caseSensitive", "false"));

  long splitSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_SIZE,
      TableProperties.SPLIT_SIZE_DEFAULT);
  long targetFileSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetSizeInBytes = Math.min(splitSize, targetFileSize);

  this.splitLookback = PropertyUtil.propertyAsInt(
      table.properties(),
      TableProperties.SPLIT_LOOKBACK,
      TableProperties.SPLIT_LOOKBACK_DEFAULT);
  this.splitOpenFileCost = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_OPEN_FILE_COST,
      TableProperties.SPLIT_OPEN_FILE_COST_DEFAULT);

  this.fileIO = SparkUtil.serializableFileIO(table);
  this.encryptionManager = table.encryption();
}

Source File: IcebergInputFormat.java From iceberg with Apache License 2.0

5 votes

private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual,
                                                    Schema readSchema) {
  boolean applyResidual = !context.getConfiguration().getBoolean(SKIP_RESIDUAL_FILTERING, false);

  if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
    Evaluator filter = new Evaluator(readSchema.asStruct(), residual, caseSensitive);
    return CloseableIterable.filter(iter, record -> filter.eval((StructLike) record));
  } else {
    return iter;
  }
}

Source File: ScanSummary.java From iceberg with Apache License 2.0

5 votes

static Expression joinFilters(List<Expression> expressions) {
  Expression result = Expressions.alwaysTrue();
  for (Expression expression : expressions) {
    result = Expressions.and(result, expression);
  }
  return result;
}

Source File: ManifestReader.java From iceberg with Apache License 2.0

4 votes

private static boolean requireStatsProjection(Expression rowFilter, Collection<String> columns) {
  // Make sure we have all stats columns for metrics evaluator
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      !columns.containsAll(STATS_COLUMNS);
}

Source File: Reader.java From iceberg with Apache License 2.0

4 votes

private Expression filterExpression() {
  if (filterExpressions != null) {
    return filterExpressions.stream().reduce(Expressions.alwaysTrue(), Expressions::and);
  }
  return Expressions.alwaysTrue();
}

Java Code Examples for org.apache.iceberg.expressions.Expressions#alwaysTrue()