org.apache.parquet.filter2.predicate.FilterPredicate Java Exaples

Source File: TestBloomFiltering.java From parquet-mr with Apache License 2.0

6 votes

private void assertCorrectFiltering(Predicate<PhoneBookWriter.User> expectedFilter, FilterPredicate actualFilter)
  throws IOException {
  // Check with only bloom filter based filtering
  List<PhoneBookWriter.User> result = readUsers(actualFilter, false, true);

  assertTrue("Bloom filtering should drop some row groups", result.size() < DATA.size());
  LOGGER.info("{}/{} records read; filtering ratio: {}%", result.size(), DATA.size(),
    100 * result.size() / DATA.size());
  // Asserts that all the required records are in the result
  assertContains(DATA.stream().filter(expectedFilter), result);
  // Asserts that all the retrieved records are in the file (validating non-matching records)
  assertContains(result.stream(), DATA);

  // Check with all the filtering filtering to ensure the result contains exactly the required values
  result = readUsers(actualFilter, true, false);
  assertEquals(DATA.stream().filter(expectedFilter).collect(Collectors.toList()), result);
}

Source File: TestStatisticsFilter.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testClearExceptionForNots() {
  List<ColumnChunkMetaData> columnMetas = Arrays.asList(
      getDoubleColumnMeta(new DoubleStatistics(), 0L),
      getIntColumnMeta(new IntStatistics(), 0L));

  FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));

  try {
    canDrop(pred, columnMetas);
    fail("This should throw");
  } catch (IllegalArgumentException e) {
    assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?"
        + " not(eq(double.column, 12.0))", e.getMessage());
  }
}

Source File: ParquetFilters.java From iceberg with Apache License 2.0

6 votes

private static
<C extends Comparable<C>, COL extends Operators.Column<C> & Operators.SupportsLtGt>
FilterPredicate pred(Operation op, COL col, C value) {
  switch (op) {
    case IS_NULL:
      return FilterApi.eq(col, null);
    case NOT_NULL:
      return FilterApi.notEq(col, null);
    case EQ:
      return FilterApi.eq(col, value);
    case NOT_EQ:
      return FilterApi.notEq(col, value);
    case GT:
      return FilterApi.gt(col, value);
    case GT_EQ:
      return FilterApi.gtEq(col, value);
    case LT:
      return FilterApi.lt(col, value);
    case LT_EQ:
      return FilterApi.ltEq(col, value);
    default:
      throw new UnsupportedOperationException("Unsupported predicate operation: " + op);
  }
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

6 votes

@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}

Source File: TestColumnIndexFiltering.java From parquet-mr with Apache License 2.0

6 votes

private void assertCorrectFiltering(Predicate<User> expectedFilter, FilterPredicate actualFilter)
    throws IOException {
  // Check with only column index based filtering
  List<User> result = readUsers(actualFilter, false);

  assertTrue("Column-index filtering should drop some pages", result.size() < DATA.size());
  LOGGER.info("{}/{} records read; filtering ratio: {}%", result.size(), DATA.size(),
      100 * result.size() / DATA.size());
  // Asserts that all the required records are in the result
  assertContains(DATA.stream().filter(expectedFilter), result);
  // Asserts that all the retrieved records are in the file (validating non-matching records)
  assertContains(result.stream(), DATA);

  // Check with all the filtering filtering to ensure the result contains exactly the required values
  result = readUsers(actualFilter, true);
  assertEquals(DATA.stream().filter(expectedFilter).collect(Collectors.toList()), result);
}

Source File: TestRecordLevelFilters.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testUserDefinedByInstance() throws Exception {
  LongColumn name = longColumn("id");

  final HashSet<Long> h = new HashSet<Long>();
  h.add(20L); 
  h.add(27L);
  h.add(28L);
  
  FilterPredicate pred = userDefined(name, new SetInFilter(h));

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));

  assertFilter(found, new UserFilter() {
    @Override
    public boolean keep(User u) {
      return u != null && h.contains(u.getId());
    }
  });
}

Source File: ParquetRecordFilterBuilder.java From pxf with Apache License 2.0

6 votes

/**
 * Returns the FilterPredicate function that supports equals and not equals
 * for the given operator
 *
 * @param operator the operator
 * @param <T>      the type
 * @param <C>      the column type
 * @return the FilterPredicate function
 */
private static <T extends Comparable<T>, C extends Operators.Column<T> & Operators.SupportsEqNotEq> BiFunction<C, T, FilterPredicate> getOperatorWithEqNotEqSupport(Operator operator) {
    switch (operator) {
        case IS_NULL:
        case EQUALS:
        case NOOP:
            return FilterApi::eq;
        // NOT boolean wraps a NOOP
        //       NOT
        //        |
        //       NOOP
        //        |
        //    ---------
        //   |         |
        //   4        true
        // that needs to be replaced with equals
        case IS_NOT_NULL:
        case NOT_EQUALS:
            return FilterApi::notEq;

        default:
            throw new UnsupportedOperationException("not supported " + operator);
    }
}

Source File: DictionaryFilterTest.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testAnd() throws Exception {
  BinaryColumn col = binaryColumn("binary_field");

  // both evaluate to false (no upper-case letters are in the dictionary)
  FilterPredicate B = eq(col, Binary.fromString("B"));
  FilterPredicate C = eq(col, Binary.fromString("C"));

  // both evaluate to true (all lower-case letters are in the dictionary)
  FilterPredicate x = eq(col, Binary.fromString("x"));
  FilterPredicate y = eq(col, Binary.fromString("y"));

  assertTrue("Should drop when either predicate must be false",
      canDrop(and(B, y), ccmd, dictionaries));
  assertTrue("Should drop when either predicate must be false",
      canDrop(and(x, C), ccmd, dictionaries));
  assertTrue("Should drop when either predicate must be false",
      canDrop(and(B, C), ccmd, dictionaries));
  assertFalse("Should not drop when either predicate could be true",
      canDrop(and(x, y), ccmd, dictionaries));
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

6 votes

@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}

Source File: DictionaryFilterTest.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testInverseUdp() throws Exception {
  InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));
  InInt32UDP undroppable = new InInt32UDP(ImmutableSet.of(205));
  Set<Integer> allValues = ImmutableSet.copyOf(Ints.asList(intValues));
  InInt32UDP completeMatch = new InInt32UDP(allValues);

  FilterPredicate inverse =
    LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), droppable)));
  FilterPredicate inverse1 =
    LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), undroppable)));
  FilterPredicate inverse2 =
    LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), completeMatch)));

  assertFalse("Should not drop block for inverse of non-matching UDP",
    canDrop(inverse, ccmd, dictionaries));

  assertFalse("Should not drop block for inverse of UDP with some matches",
    canDrop(inverse1, ccmd, dictionaries));

  assertTrue("Should drop block for inverse of UDP with all matches",
    canDrop(inverse2, ccmd, dictionaries));
}

Source File: DictionaryFilterTest.java From parquet-mr with Apache License 2.0

6 votes

@Test
public void testOr() throws Exception {
  BinaryColumn col = binaryColumn("binary_field");

  // both evaluate to false (no upper-case letters are in the dictionary)
  FilterPredicate B = eq(col, Binary.fromString("B"));
  FilterPredicate C = eq(col, Binary.fromString("C"));

  // both evaluate to true (all lower-case letters are in the dictionary)
  FilterPredicate x = eq(col, Binary.fromString("x"));
  FilterPredicate y = eq(col, Binary.fromString("y"));

  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(B, y), ccmd, dictionaries));
  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(x, C), ccmd, dictionaries));
  assertTrue("Should drop when both predicates must be false",
      canDrop(or(B, C), ccmd, dictionaries));
  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(x, y), ccmd, dictionaries));
}

Source File: ParquetFilters.java From iceberg with Apache License 2.0

5 votes

@Override
public FilterPredicate or(FilterPredicate left, FilterPredicate right) {
  if (left == AlwaysTrue.INSTANCE || right == AlwaysTrue.INSTANCE) {
    return AlwaysTrue.INSTANCE;
  } else if (left == AlwaysFalse.INSTANCE) {
    return right;
  } else if (right == AlwaysFalse.INSTANCE) {
    return left;
  }
  return FilterApi.or(left, right);
}

Source File: ParquetFilters.java From iceberg with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public <T> FilterPredicate predicate(UnboundPredicate<T> pred) {
  Expression bound = bind(pred);
  if (bound instanceof BoundPredicate) {
    return predicate((BoundPredicate<?>) bound);
  } else if (bound == Expressions.alwaysTrue()) {
    return AlwaysTrue.INSTANCE;
  } else if (bound == Expressions.alwaysFalse()) {
    return AlwaysFalse.INSTANCE;
  }
  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

5 votes

@Override
public TableSource<Row> applyPredicate(List<Expression> predicates) {

	// try to convert Flink filter expressions to Parquet FilterPredicates
	List<FilterPredicate> convertedPredicates = new ArrayList<>(predicates.size());
	List<Expression> unsupportedExpressions = new ArrayList<>(predicates.size());

	for (Expression toConvert : predicates) {
		FilterPredicate convertedPredicate = toParquetPredicate(toConvert);
		if (convertedPredicate != null) {
			convertedPredicates.add(convertedPredicate);
		} else {
			unsupportedExpressions.add(toConvert);
		}
	}

	// update list of Flink expressions to unsupported expressions
	predicates.clear();
	predicates.addAll(unsupportedExpressions);

	// construct single Parquet FilterPredicate
	FilterPredicate parquetPredicate = null;
	if (!convertedPredicates.isEmpty()) {
		// concat converted predicates with AND
		parquetPredicate = convertedPredicates.get(0);

		for (FilterPredicate converted : convertedPredicates.subList(1, convertedPredicates.size())) {
			parquetPredicate = FilterApi.and(parquetPredicate, converted);
		}
	}

	// create and return a new ParquetTableSource with Parquet FilterPredicate
	return new ParquetTableSource(path, parquetSchema, this.parquetConfig, recursiveEnumeration, selectedFields, parquetPredicate);
}

Source File: TestRecordLevelFilters.java From parquet-mr with Apache License 2.0

5 votes

@Test
public void testNameNotStartWithP() throws Exception {
  BinaryColumn name = binaryColumn("name");

  FilterPredicate pred = not(userDefined(name, StartWithP.class));

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));

  assertFilter(found, new UserFilter() {
    @Override
    public boolean keep(User u) {
      return u.getName() == null || !u.getName().startsWith("p");
    }
  });
}

Source File: ParquetLoader.java From parquet-mr with Apache License 2.0

5 votes

private FilterPredicate buildFilter(OpType op, Column col, Const value) {
  String name = col.getName();
  try {
    FieldSchema f = schema.getField(name);
    switch (f.type) {
      case DataType.BOOLEAN:
        Operators.BooleanColumn boolCol = booleanColumn(name);
        switch(op) {
          case OP_EQ: return eq(boolCol, getValue(value, boolCol.getColumnType()));
          case OP_NE: return notEq(boolCol, getValue(value, boolCol.getColumnType()));
          default: throw new RuntimeException(
              "Operation " + op + " not supported for boolean column: " + name);
        }
      case DataType.INTEGER:
        Operators.IntColumn intCol = intColumn(name);
        return op(op, intCol, value);
      case DataType.LONG:
        Operators.LongColumn longCol = longColumn(name);
        return op(op, longCol, value);
      case DataType.FLOAT:
        Operators.FloatColumn floatCol = floatColumn(name);
        return op(op, floatCol, value);
      case DataType.DOUBLE:
        Operators.DoubleColumn doubleCol = doubleColumn(name);
        return op(op, doubleCol, value);
      case DataType.CHARARRAY:
        Operators.BinaryColumn binaryCol = binaryColumn(name);
        return op(op, binaryCol, value);
      default:
        throw new RuntimeException("Unsupported type " + f.type + " for field: " + name);
    }
  } catch (FrontendException e) {
    throw new RuntimeException("Error processing pushdown for column:" + col, e);
  }
}

Source File: TestRecordLevelFilters.java From parquet-mr with Apache License 2.0

5 votes

@Test
public void testAllFilter() throws Exception {
  BinaryColumn name = binaryColumn("name");

  FilterPredicate pred = eq(name, Binary.fromString("no matches"));

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
  assertEquals(new ArrayList<Group>(), found);
}

Source File: ParquetFilters.java From iceberg with Apache License 2.0

5 votes

static FilterCompat.Filter convert(Schema schema, Expression expr, boolean caseSensitive) {
  FilterPredicate pred = ExpressionVisitors.visit(expr, new ConvertFilterToParquet(schema, caseSensitive));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

5 votes

@Nullable
private FilterPredicate lessThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThanOrEqual, "exp has to be LessThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.ltEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.ltEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.ltEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.ltEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

5 votes

private ParquetTableSource(String path, MessageType parquetSchema, Configuration configuration,
								boolean recursiveEnumeration, @Nullable int[] selectedFields, @Nullable FilterPredicate predicate) {
	Preconditions.checkNotNull(path, "Path must not be null.");
	Preconditions.checkNotNull(parquetSchema, "ParquetSchema must not be null.");
	Preconditions.checkNotNull(configuration, "Configuration must not be null");
	this.path = path;
	this.parquetSchema = parquetSchema;
	this.parquetConfig = configuration;
	this.selectedFields = selectedFields;
	this.predicate = predicate;
	this.recursiveEnumeration = recursiveEnumeration;

	if (predicate != null) {
		this.isFilterPushedDown = true;
	}
	// determine the type information from the Parquet schema
	RowTypeInfo typeInfoFromSchema = (RowTypeInfo) ParquetSchemaConverter.fromParquetType(parquetSchema);

	// set return type info
	if (selectedFields == null) {
		this.typeInfo = typeInfoFromSchema;
	} else {
		this.typeInfo = RowTypeInfo.projectFields(typeInfoFromSchema, selectedFields);
	}

	// create a TableSchema that corresponds to the Parquet schema
	this.tableSchema = new TableSchema(
		typeInfoFromSchema.getFieldNames(),
		typeInfoFromSchema.getFieldTypes()
	);
}

Source File: ParquetLoader.java From parquet-mr with Apache License 2.0

5 votes

private void setInput(String location, Job job) throws IOException {
  this.setLocationHasBeenCalled  = true;
  this.location = location;
  setInputPaths(job, location);

  //This is prior to load because the initial value comes from the constructor
  //not file metadata or pig framework and would get overwritten in initSchema().
  if(UDFContext.getUDFContext().isFrontend()) {
    storeInUDFContext(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
  }

  schema = PigSchemaConverter.parsePigSchema(getPropertyFromUDFContext(PARQUET_PIG_SCHEMA));
  requiredFieldList = PigSchemaConverter.deserializeRequiredFieldList(getPropertyFromUDFContext(PARQUET_PIG_REQUIRED_FIELDS));
  columnIndexAccess = Boolean.parseBoolean(getPropertyFromUDFContext(PARQUET_COLUMN_INDEX_ACCESS));

  initSchema(job);

  if(UDFContext.getUDFContext().isFrontend()) {
    //Setting for task-side loading via initSchema()
    storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
    storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
  }

  //Used by task-side loader via TupleReadSupport
  getConfiguration(job).set(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
  getConfiguration(job).set(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
  getConfiguration(job).set(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));

  FilterPredicate filterPredicate = (FilterPredicate) getFromUDFContext(ParquetInputFormat.FILTER_PREDICATE);
  if(filterPredicate != null) {
    ParquetInputFormat.setFilterPredicate(getConfiguration(job), filterPredicate);
  }
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

5 votes

@Nullable
private FilterPredicate greaterThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThan, "exp has to be GreaterThan");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}

Source File: ParquetTableSource.java From flink with Apache License 2.0

5 votes

@Nullable
private FilterPredicate greaterThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThanOrEqual, "exp has to be GreaterThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gtEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gtEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gtEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gtEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}

Source File: TestBloomFiltering.java From parquet-mr with Apache License 2.0

5 votes

private List<PhoneBookWriter.User> readUsers(FilterPredicate filter, boolean useOtherFiltering,
                                             boolean useBloomFilter) throws IOException {
  return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file)
    .withFilter(FilterCompat.get(filter))
    .useDictionaryFilter(useOtherFiltering)
    .useStatsFilter(useOtherFiltering)
    .useRecordFilter(useOtherFiltering)
    .useBloomFilter(useBloomFilter)
    .useColumnIndexFilter(useOtherFiltering));
}

Source File: RowGroupFilter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public List<BlockMetaData> visit(FilterCompat.FilterPredicateCompat filterPredicateCompat) {
  FilterPredicate filterPredicate = filterPredicateCompat.getFilterPredicate();

  // check that the schema of the filter matches the schema of the file
  SchemaCompatibilityValidator.validate(filterPredicate, schema);

  List<BlockMetaData> filteredBlocks = new ArrayList<BlockMetaData>();

  for (BlockMetaData block : blocks) {
    boolean drop = false;

    if(levels.contains(FilterLevel.STATISTICS)) {
      drop = StatisticsFilter.canDrop(filterPredicate, block.getColumns());
    }

    if(!drop && levels.contains(FilterLevel.DICTIONARY)) {
      drop = DictionaryFilter.canDrop(filterPredicate, block.getColumns(), reader.getDictionaryReader(block));
    }

    if (!drop && levels.contains(FilterLevel.BLOOMFILTER)) {
      drop = BloomFilterImpl.canDrop(filterPredicate, block.getColumns(), reader.getBloomFilterDataReader(block));
    }

    if(!drop) {
      filteredBlocks.add(block);
    }
  }

  return filteredBlocks;
}

Source File: ParquetInputFormat.java From parquet-mr with Apache License 2.0

5 votes

private static FilterPredicate getFilterPredicate(Configuration configuration) {
  try {
    return SerializationUtil.readObjectFromConfAsBase64(FILTER_PREDICATE, configuration);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: ParquetInputFormat.java From parquet-mr with Apache License 2.0

5 votes

public static void setFilterPredicate(Configuration configuration, FilterPredicate filterPredicate) {
  checkArgument(getUnboundRecordFilter(configuration) == null,
      "You cannot provide a FilterPredicate after providing an UnboundRecordFilter");

  configuration.set(FILTER_PREDICATE + ".human.readable", filterPredicate.toString());
  try {
    SerializationUtil.writeObjectToConfAsBase64(FILTER_PREDICATE, filterPredicate, configuration);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: FilteringBenchmarks.java From parquet-mr with Apache License 2.0

5 votes

private void benchmark(Blackhole blackhole, BaseContext context) throws Exception {
  FilterPredicate filter = FilterApi.eq(BaseContext.COLUMN, context.getRandom().nextLong());
  try (ParquetReader<Group> reader = context.createReaderBuilder()
      .withFilter(FilterCompat.get(filter))
      .build()) {
    blackhole.consume(reader.read());
  }
}

Source File: FilterCompat.java From parquet-mr with Apache License 2.0

5 votes

/**
 * Given a FilterPredicate, return a Filter that wraps it.
 * This method also logs the filter being used and rewrites
 * the predicate to not include the not() operator.
 *
 * @param filterPredicate a filter predicate
 * @return a filter for the given predicate
 */
public static Filter get(FilterPredicate filterPredicate) {
  Objects.requireNonNull(filterPredicate, "filterPredicate cannot be null");

  LOG.info("Filtering using predicate: {}", filterPredicate);

  // rewrite the predicate to not include the not() operator
  FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);

  if (!filterPredicate.equals(collapsedPredicate)) {
    LOG.info("Predicate has been collapsed to: {}", collapsedPredicate);
  }

  return new FilterPredicateCompat(collapsedPredicate);
}

Source File: FilterCompat.java From parquet-mr with Apache License 2.0

5 votes

/**
 * Given either a FilterPredicate or the class of an UnboundRecordFilter, or neither (but not both)
 * return a Filter that wraps whichever was provided.
 * <p>
 * Either filterPredicate or unboundRecordFilterClass must be null, or an exception is thrown.
 * <p>
 * If both are null, the no op filter will be returned.
 *
 * @param filterPredicate a filter predicate, or null
 * @param unboundRecordFilter an unbound record filter, or null
 * @return a Filter wrapping either the predicate or the unbound record filter (from the old API)
 */
public static Filter get(FilterPredicate filterPredicate, UnboundRecordFilter unboundRecordFilter) {
  checkArgument(filterPredicate == null || unboundRecordFilter == null,
      "Cannot provide both a FilterPredicate and an UnboundRecordFilter");

  if (filterPredicate != null) {
    return get(filterPredicate);
  }

  if (unboundRecordFilter != null) {
    return get(unboundRecordFilter);
  }

  return NOOP;
}

org.apache.parquet.filter2.predicate.FilterPredicate Java Examples