io.prestosql.spi.predicate.TupleDomain#filter

Source File: WindowFilterPushDown.java From presto with Apache License 2.0

6 votes

private PlanNode rewriteFilterSource(FilterNode filterNode, PlanNode source, Symbol rowNumberSymbol, int upperBound)
{
    ExtractionResult extractionResult = fromPredicate(metadata, session, filterNode.getPredicate(), types);
    TupleDomain<Symbol> tupleDomain = extractionResult.getTupleDomain();

    if (!allRowNumberValuesInDomain(tupleDomain, rowNumberSymbol, upperBound)) {
        return new FilterNode(filterNode.getId(), source, filterNode.getPredicate());
    }

    // Remove the row number domain because it is absorbed into the node
    TupleDomain<Symbol> newTupleDomain = tupleDomain.filter((symbol, domain) -> !symbol.equals(rowNumberSymbol));
    Expression newPredicate = ExpressionUtils.combineConjuncts(
            metadata,
            extractionResult.getRemainingExpression(),
            domainTranslator.toPredicate(newTupleDomain));

    if (newPredicate.equals(BooleanLiteral.TRUE_LITERAL)) {
        return source;
    }
    return new FilterNode(filterNode.getId(), source, newPredicate);
}

Source File: TpchIndexMetadata.java From presto with Apache License 2.0

5 votes

@Override
public Optional<ConnectorResolvedIndex> resolveIndex(
        ConnectorSession session,
        ConnectorTableHandle tableHandle,
        Set<ColumnHandle> indexableColumns,
        Set<ColumnHandle> outputColumns,
        TupleDomain<ColumnHandle> tupleDomain)
{
    TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle;

    // Keep the fixed values that don't overlap with the indexableColumns
    // Note: technically we could more efficiently utilize the overlapped columns, but this way is simpler for now

    Map<ColumnHandle, NullableValue> fixedValues = TupleDomain.extractFixedValues(tupleDomain).orElse(ImmutableMap.of())
            .entrySet().stream()
            .filter(entry -> !indexableColumns.contains(entry.getKey()))
            .filter(entry -> !entry.getValue().isNull()) // strip nulls since meaningless in index join lookups
            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

    // determine all columns available for index lookup
    Set<String> lookupColumnNames = ImmutableSet.<String>builder()
            .addAll(handleToNames(ImmutableList.copyOf(indexableColumns)))
            .addAll(handleToNames(ImmutableList.copyOf(fixedValues.keySet())))
            .build();

    // do we have an index?
    if (indexedData.getIndexedTable(tpchTableHandle.getTableName(), tpchTableHandle.getScaleFactor(), lookupColumnNames).isEmpty()) {
        return Optional.empty();
    }

    TupleDomain<ColumnHandle> filteredTupleDomain = tupleDomain.filter((column, domain) -> !fixedValues.containsKey(column));
    TpchIndexHandle indexHandle = new TpchIndexHandle(
            tpchTableHandle.getTableName(),
            tpchTableHandle.getScaleFactor(),
            lookupColumnNames,
            TupleDomain.fromFixedValues(fixedValues));
    return Optional.of(new ConnectorResolvedIndex(indexHandle, filteredTupleDomain));
}

Source File: PredicateUtils.java From presto with Apache License 2.0

4 votes

public static TupleDomain<ColumnHandle> filterColumns(TupleDomain<ColumnHandle> predicate, Predicate<TpchColumnHandle> filterPredicate)
{
    return predicate.filter((columnHandle, domain) -> filterPredicate.test((TpchColumnHandle) columnHandle));
}

Source File: S3SelectRecordCursorProvider.java From presto with Apache License 2.0

4 votes

@Override
public Optional<ReaderRecordCursorWithProjections> createRecordCursor(
        Configuration configuration,
        ConnectorSession session,
        Path path,
        long start,
        long length,
        long fileSize,
        Properties schema,
        List<HiveColumnHandle> columns,
        TupleDomain<HiveColumnHandle> effectivePredicate,
        DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager,
        boolean s3SelectPushdownEnabled)
{
    if (!s3SelectPushdownEnabled) {
        return Optional.empty();
    }

    try {
        this.hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
    }
    catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
    }

    Optional<ReaderProjections> projectedReaderColumns = projectBaseColumns(columns);
    // Ignore predicates on partial columns for now.
    effectivePredicate = effectivePredicate.filter((column, domain) -> column.isBaseColumn());

    String serdeName = getDeserializerClassName(schema);
    if (CSV_SERDES.contains(serdeName)) {
        List<HiveColumnHandle> readerColumns = projectedReaderColumns
                .map(ReaderProjections::getReaderColumns)
                .orElse(columns);

        IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager);
        String ionSqlQuery = queryBuilder.buildSql(readerColumns, effectivePredicate);
        S3SelectLineRecordReader recordReader = new S3SelectCsvRecordReader(configuration, path, start, length, schema, ionSqlQuery, s3ClientFactory);

        RecordCursor cursor = new S3SelectRecordCursor<>(configuration, path, recordReader, length, schema, readerColumns, hiveStorageTimeZone);
        return Optional.of(new ReaderRecordCursorWithProjections(cursor, projectedReaderColumns));
    }

    // unsupported serdes
    return Optional.empty();
}

Source File: HivePartitionManager.java From presto with Apache License 2.0

4 votes

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, HiveIdentity identity, ConnectorTableHandle tableHandle, Constraint constraint)
{
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary()
            .intersect(hiveTableHandle.getEnforcedConstraint());

    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
    List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();

    if (effectivePredicate.isNone()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty());
    }

    Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName())
            .orElseThrow(() -> new TableNotFoundException(tableName));

    Optional<HiveBucketFilter> bucketFilter = getHiveBucketFilter(table, effectivePredicate);
    TupleDomain<HiveColumnHandle> compactEffectivePredicate = effectivePredicate
            .transform(HiveColumnHandle.class::cast)
            .simplify(domainCompactionThreshold);

    if (partitionColumns.isEmpty()) {
        return new HivePartitionResult(
                partitionColumns,
                ImmutableList.of(new HivePartition(tableName)),
                compactEffectivePredicate,
                effectivePredicate,
                TupleDomain.all(),
                hiveBucketHandle,
                bucketFilter);
    }

    List<Type> partitionTypes = partitionColumns.stream()
            .map(HiveColumnHandle::getType)
            .collect(toList());

    Iterable<HivePartition> partitionsIterable;
    Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
    if (hiveTableHandle.getPartitions().isPresent()) {
        partitionsIterable = hiveTableHandle.getPartitions().get().stream()
                .filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition))
                .collect(toImmutableList());
    }
    else {
        List<String> partitionNames = getFilteredPartitionNames(metastore, identity, tableName, partitionColumns, effectivePredicate);
        partitionsIterable = () -> partitionNames.stream()
                // Apply extra filters which could not be done by getFilteredPartitionNames
                .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate))
                .filter(Optional::isPresent)
                .map(Optional::get)
                .iterator();
    }

    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = effectivePredicate.filter((column, domain) -> !partitionColumns.contains(column));
    TupleDomain<ColumnHandle> enforcedTupleDomain = effectivePredicate.filter((column, domain) -> partitionColumns.contains(column));
    return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter);
}

Java Code Examples for io.prestosql.spi.predicate.TupleDomain#filter()