org.apache.parquet.filter2.predicate.Operators Java Examples
The following examples show how to use
org.apache.parquet.filter2.predicate.Operators.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetFilters.java From iceberg with Apache License 2.0 | 6 votes |
@SuppressWarnings("checkstyle:MethodTypeParameterName") private static <C extends Comparable<C>, COL extends Operators.Column<C> & Operators.SupportsLtGt> FilterPredicate pred(Operation op, COL col, C value) { switch (op) { case IS_NULL: return FilterApi.eq(col, null); case NOT_NULL: return FilterApi.notEq(col, null); case EQ: return FilterApi.eq(col, value); case NOT_EQ: return FilterApi.notEq(col, value); case GT: return FilterApi.gt(col, value); case GT_EQ: return FilterApi.gtEq(col, value); case LT: return FilterApi.lt(col, value); case LT_EQ: return FilterApi.ltEq(col, value); default: throw new UnsupportedOperationException("Unsupported predicate operation: " + op); } }
Example #2
Source File: ParquetRecordFilterBuilder.java From pxf with Apache License 2.0 | 6 votes |
/** * Returns the FilterPredicate function that supports equals and not equals * for the given operator * * @param operator the operator * @param <T> the type * @param <C> the column type * @return the FilterPredicate function */ private static <T extends Comparable<T>, C extends Operators.Column<T> & Operators.SupportsEqNotEq> BiFunction<C, T, FilterPredicate> getOperatorWithEqNotEqSupport(Operator operator) { switch (operator) { case IS_NULL: case EQUALS: case NOOP: return FilterApi::eq; // NOT boolean wraps a NOOP // NOT // | // NOOP // | // --------- // | | // 4 true // that needs to be replaced with equals case IS_NOT_NULL: case NOT_EQUALS: return FilterApi::notEq; default: throw new UnsupportedOperationException("not supported " + operator); } }
Example #3
Source File: ParquetRecordFilterBuilder.java From pxf with Apache License 2.0 | 6 votes |
/** * Returns the FilterPredicate function that supports less than / * greater than for the given operator * * @param operator the operator * @param <T> the type * @param <C> the column type * @return the FilterPredicate function */ private static <T extends Comparable<T>, C extends Operators.Column<T> & Operators.SupportsLtGt> BiFunction<C, T, FilterPredicate> getOperatorWithLtGtSupport(Operator operator) { switch (operator) { case LESS_THAN: return FilterApi::lt; case GREATER_THAN: return FilterApi::gt; case LESS_THAN_OR_EQUAL: return FilterApi::ltEq; case GREATER_THAN_OR_EQUAL: return FilterApi::gtEq; default: return getOperatorWithEqNotEqSupport(operator); } }
Example #4
Source File: ParquetFilters.java From iceberg with Apache License 2.0 | 6 votes |
private static <C extends Comparable<C>, COL extends Operators.Column<C> & Operators.SupportsLtGt> FilterPredicate pred(Operation op, COL col, C value) { switch (op) { case IS_NULL: return FilterApi.eq(col, null); case NOT_NULL: return FilterApi.notEq(col, null); case EQ: return FilterApi.eq(col, value); case NOT_EQ: return FilterApi.notEq(col, value); case GT: return FilterApi.gt(col, value); case GT_EQ: return FilterApi.gtEq(col, value); case LT: return FilterApi.lt(col, value); case LT_EQ: return FilterApi.ltEq(col, value); default: throw new UnsupportedOperationException("Unsupported predicate operation: " + op); } }
Example #5
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
private FilterPredicate buildFilter(OpType op, Column col, Const value) { String name = col.getName(); try { FieldSchema f = schema.getField(name); switch (f.type) { case DataType.BOOLEAN: Operators.BooleanColumn boolCol = booleanColumn(name); switch(op) { case OP_EQ: return eq(boolCol, getValue(value, boolCol.getColumnType())); case OP_NE: return notEq(boolCol, getValue(value, boolCol.getColumnType())); default: throw new RuntimeException( "Operation " + op + " not supported for boolean column: " + name); } case DataType.INTEGER: Operators.IntColumn intCol = intColumn(name); return op(op, intCol, value); case DataType.LONG: Operators.LongColumn longCol = longColumn(name); return op(op, longCol, value); case DataType.FLOAT: Operators.FloatColumn floatCol = floatColumn(name); return op(op, floatCol, value); case DataType.DOUBLE: Operators.DoubleColumn doubleCol = doubleColumn(name); return op(op, doubleCol, value); case DataType.CHARARRAY: Operators.BinaryColumn binaryCol = binaryColumn(name); return op(op, binaryCol, value); default: throw new RuntimeException("Unsupported type " + f.type + " for field: " + name); } } catch (FrontendException e) { throw new RuntimeException("Error processing pushdown for column:" + col, e); } }
Example #6
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.Eq<T> eq) { T value = eq.getValue(); if (value == null) { // the bloom filter bitset contains only non-null values so isn't helpful. this // could check the column stats, but the StatisticsFilter is responsible return BLOCK_MIGHT_MATCH; } Operators.Column<T> filterColumn = eq.getColumn(); ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath()); if (meta == null) { // the column isn't in this file so all values are null, but the value // must be non-null because of the above check. return BLOCK_CANNOT_MATCH; } try { BloomFilter bloomFilter = bloomFilterReader.readBloomFilter(meta); if (bloomFilter != null && !bloomFilter.findHash(bloomFilter.hash(value))) { return BLOCK_CANNOT_MATCH; } } catch (RuntimeException e) { LOG.warn(e.getMessage()); return BLOCK_MIGHT_MATCH; } return BLOCK_MIGHT_MATCH; }
Example #7
Source File: ParquetFilters.java From iceberg with Apache License 2.0 | 4 votes |
@Override public <T> FilterPredicate predicate(BoundPredicate<T> pred) { Operation op = pred.op(); BoundReference<T> ref = pred.ref(); Literal<T> lit = pred.literal(); String path = schema.idToAlias(ref.fieldId()); switch (ref.type().typeId()) { case BOOLEAN: Operators.BooleanColumn col = FilterApi.booleanColumn(schema.idToAlias(ref.fieldId())); switch (op) { case EQ: return FilterApi.eq(col, getParquetPrimitive(lit)); case NOT_EQ: return FilterApi.eq(col, getParquetPrimitive(lit)); } case INTEGER: return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit)); case LONG: return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit)); case FLOAT: return pred(op, FilterApi.floatColumn(path), getParquetPrimitive(lit)); case DOUBLE: return pred(op, FilterApi.doubleColumn(path), getParquetPrimitive(lit)); case DATE: return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit)); case TIME: return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit)); case TIMESTAMP: return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit)); case STRING: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); case UUID: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); case FIXED: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); case BINARY: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); case DECIMAL: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); } throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred); }
Example #8
Source File: ParquetRecordFilterBuilder.java From pxf with Apache License 2.0 | 4 votes |
/** * Handles simple column-operator-constant expressions. * * @param operatorNode the operator node */ private void processSimpleColumnOperator(OperatorNode operatorNode) { Operator operator = operatorNode.getOperator(); ColumnIndexOperandNode columnIndexOperand = operatorNode.getColumnIndexOperand(); OperandNode valueOperand = null; if (operator != Operator.IS_NULL && operator != Operator.IS_NOT_NULL) { valueOperand = operatorNode.getValueOperand(); if (valueOperand == null) { throw new IllegalArgumentException( String.format("Operator %s does not contain an operand", operator)); } } ColumnDescriptor columnDescriptor = columnDescriptors.get(columnIndexOperand.index()); String filterColumnName = columnDescriptor.columnName(); Type type = fields.get(filterColumnName); // INT96 and FIXED_LEN_BYTE_ARRAY cannot be pushed down // for more details look at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter#expandDictionary // where INT96 and FIXED_LEN_BYTE_ARRAY are not dictionary values FilterPredicate simpleFilter; switch (type.asPrimitiveType().getPrimitiveTypeName()) { case INT32: simpleFilter = ParquetRecordFilterBuilder.<Integer, Operators.IntColumn>getOperatorWithLtGtSupport(operator) .apply(intColumn(type.getName()), getIntegerForINT32(type.getOriginalType(), valueOperand)); break; case INT64: simpleFilter = ParquetRecordFilterBuilder.<Long, Operators.LongColumn>getOperatorWithLtGtSupport(operator) .apply(longColumn(type.getName()), valueOperand == null ? null : Long.parseLong(valueOperand.toString())); break; case BINARY: simpleFilter = ParquetRecordFilterBuilder.<Binary, Operators.BinaryColumn>getOperatorWithLtGtSupport(operator) .apply(binaryColumn(type.getName()), valueOperand == null ? null : Binary.fromString(valueOperand.toString())); break; case BOOLEAN: // Boolean does not SupportsLtGt simpleFilter = ParquetRecordFilterBuilder.<Boolean, Operators.BooleanColumn>getOperatorWithEqNotEqSupport(operator) .apply(booleanColumn(type.getName()), valueOperand == null ? null : Boolean.parseBoolean(valueOperand.toString())); break; case FLOAT: simpleFilter = ParquetRecordFilterBuilder.<Float, Operators.FloatColumn>getOperatorWithLtGtSupport(operator) .apply(floatColumn(type.getName()), valueOperand == null ? null : Float.parseFloat(valueOperand.toString())); break; case DOUBLE: simpleFilter = ParquetRecordFilterBuilder.<Double, Operators.DoubleColumn>getOperatorWithLtGtSupport(operator) .apply(doubleColumn(type.getName()), valueOperand == null ? null : Double.parseDouble(valueOperand.toString())); break; default: throw new UnsupportedOperationException(String.format("Column %s of type %s is not supported", type.getName(), type.asPrimitiveType().getPrimitiveTypeName())); } filterQueue.push(simpleFilter); }
Example #9
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.NotEq<T> notEq) { return BLOCK_MIGHT_MATCH; }
Example #10
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.Lt<T> lt) { return BLOCK_MIGHT_MATCH; }
Example #11
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.LtEq<T> ltEq) { return BLOCK_MIGHT_MATCH; }
Example #12
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.Gt<T> gt) { return BLOCK_MIGHT_MATCH; }
Example #13
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>> Boolean visit(Operators.GtEq<T> gtEq) { return BLOCK_MIGHT_MATCH; }
Example #14
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public Boolean visit(Operators.And and) { return and.getLeft().accept(this) || and.getRight().accept(this); }
Example #15
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public Boolean visit(Operators.Or or) { return or.getLeft().accept(this) && or.getRight().accept(this); }
Example #16
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public Boolean visit(Operators.Not not) { throw new IllegalArgumentException( "This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter? " + not); }
Example #17
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.UserDefined<T, U> ud, boolean inverted) { return BLOCK_MIGHT_MATCH; }
Example #18
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.UserDefined<T, U> udp) { return visit(udp, false); }
Example #19
Source File: BloomFilterImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.LogicalNotUserDefined<T, U> udp) { return visit(udp.getUserDefined(), true); }
Example #20
Source File: ParquetFilters.java From iceberg with Apache License 2.0 | 4 votes |
@Override public <T> FilterPredicate predicate(BoundPredicate<T> pred) { if (!(pred.term() instanceof BoundReference)) { throw new UnsupportedOperationException("Cannot convert non-reference to Parquet filter: " + pred.term()); } Operation op = pred.op(); BoundReference<T> ref = (BoundReference<T>) pred.term(); String path = schema.idToAlias(ref.fieldId()); Literal<T> lit; if (pred.isUnaryPredicate()) { lit = null; } else if (pred.isLiteralPredicate()) { lit = pred.asLiteralPredicate().literal(); } else { throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred); } switch (ref.type().typeId()) { case BOOLEAN: Operators.BooleanColumn col = FilterApi.booleanColumn(path); switch (op) { case EQ: return FilterApi.eq(col, getParquetPrimitive(lit)); case NOT_EQ: return FilterApi.notEq(col, getParquetPrimitive(lit)); } break; case INTEGER: case DATE: return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit)); case LONG: case TIME: case TIMESTAMP: return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit)); case FLOAT: return pred(op, FilterApi.floatColumn(path), getParquetPrimitive(lit)); case DOUBLE: return pred(op, FilterApi.doubleColumn(path), getParquetPrimitive(lit)); case STRING: case UUID: case FIXED: case BINARY: case DECIMAL: return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit)); } throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred); }