Java Code Examples for org.apache.parquet.column.statistics.IntStatistics#setMinMax()

The following examples show how to use org.apache.parquet.column.statistics.IntStatistics#setMinMax() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestStatisticsFilter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testEqNull() {
  IntStatistics statsNoNulls = new IntStatistics();
  statsNoNulls.setMinMax(10, 100);
  statsNoNulls.setNumNulls(0);

  IntStatistics statsSomeNulls = new IntStatistics();
  statsSomeNulls.setMinMax(10, 100);
  statsSomeNulls.setNumNulls(3);

  assertTrue(canDrop(eq(intColumn, null), Arrays.asList(
      getIntColumnMeta(statsNoNulls, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(eq(intColumn, null), Arrays.asList(
      getIntColumnMeta(statsSomeNulls, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(eq(missingColumn, null), columnMetas));

  assertFalse(canDrop(eq(intColumn, null), missingMinMaxColumnMetas));
  assertFalse(canDrop(eq(doubleColumn, null), missingMinMaxColumnMetas));
}
 
Example 2
Source File: TestStatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotEqNonNull() {
  assertFalse(canDrop(notEq(intColumn, 9), columnMetas));
  assertFalse(canDrop(notEq(intColumn, 10), columnMetas));
  assertFalse(canDrop(notEq(intColumn, 100), columnMetas));
  assertFalse(canDrop(notEq(intColumn, 101), columnMetas));

  IntStatistics allSevens = new IntStatistics();
  allSevens.setMinMax(7, 7);
  assertTrue(canDrop(notEq(intColumn, 7), Arrays.asList(
      getIntColumnMeta(allSevens, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  allSevens.setNumNulls(100L);
  assertFalse(canDrop(notEq(intColumn, 7), Arrays.asList(
      getIntColumnMeta(allSevens, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  allSevens.setNumNulls(177L);
  assertFalse(canDrop(notEq(intColumn, 7), Arrays.asList(
      getIntColumnMeta(allSevens, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(notEq(missingColumn, fromString("any")), columnMetas));

  assertFalse(canDrop(notEq(intColumn, 50), missingMinMaxColumnMetas));
  assertFalse(canDrop(notEq(doubleColumn, 50.0), missingMinMaxColumnMetas));
}
 
Example 3
Source File: TestStatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotEqNull() {
  IntStatistics statsNoNulls = new IntStatistics();
  statsNoNulls.setMinMax(10, 100);
  statsNoNulls.setNumNulls(0);

  IntStatistics statsSomeNulls = new IntStatistics();
  statsSomeNulls.setMinMax(10, 100);
  statsSomeNulls.setNumNulls(3);

  IntStatistics statsAllNulls = new IntStatistics();
  statsAllNulls.setMinMax(0, 0);
  statsAllNulls.setNumNulls(177);

  assertFalse(canDrop(notEq(intColumn, null), Arrays.asList(
      getIntColumnMeta(statsNoNulls, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(notEq(intColumn, null), Arrays.asList(
      getIntColumnMeta(statsSomeNulls, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(notEq(intColumn, null), Arrays.asList(
      getIntColumnMeta(statsAllNulls, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(notEq(missingColumn, null), columnMetas));

  assertFalse(canDrop(notEq(intColumn, null), missingMinMaxColumnMetas));
  assertFalse(canDrop(notEq(doubleColumn, null), missingMinMaxColumnMetas));
}
 
Example 4
Source File: TestTupleDomainParquetPredicate.java    From presto with Apache License 2.0 4 votes vote down vote up
private static IntStatistics intColumnStats(int minimum, int maximum)
{
    IntStatistics statistics = new IntStatistics();
    statistics.setMinMax(minimum, maximum);
    return statistics;
}
 
Example 5
Source File: TestStatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testUdp() {
  FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
  FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));

  FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
  FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));

  FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
  FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));

  FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);

  IntStatistics seven = new IntStatistics();
  seven.setMinMax(7, 7);

  IntStatistics eight = new IntStatistics();
  eight.setMinMax(8, 8);

  IntStatistics neither = new IntStatistics();
  neither.setMinMax(1 , 2);

  assertTrue(canDrop(pred, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(pred, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(pred, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(invPred, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(invPred, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(invPred, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  // udpDropMissingColumn drops null column.
  assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  // invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
  assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  // udpKeepMissingColumn keeps null column.
  assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  // invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
  assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(seven, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(eight, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
      getIntColumnMeta(neither, 177L),
      getDoubleColumnMeta(doubleStats, 177L))));

  assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
 
Example 6
Source File: TestRowGroupFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testApplyRowGroupFilters() {

  List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();

  IntStatistics stats1 = new IntStatistics();
  stats1.setMinMax(10, 100);
  stats1.setNumNulls(4);
  BlockMetaData b1 = makeBlockFromStats(stats1, 301);
  blocks.add(b1);

  IntStatistics stats2 = new IntStatistics();
  stats2.setMinMax(8, 102);
  stats2.setNumNulls(0);
  BlockMetaData b2 = makeBlockFromStats(stats2, 302);
  blocks.add(b2);

  IntStatistics stats3 = new IntStatistics();
  stats3.setMinMax(100, 102);
  stats3.setNumNulls(12);
  BlockMetaData b3 = makeBlockFromStats(stats3, 303);
  blocks.add(b3);


  IntStatistics stats4 = new IntStatistics();
  stats4.setMinMax(0, 0);
  stats4.setNumNulls(304);
  BlockMetaData b4 = makeBlockFromStats(stats4, 304);
  blocks.add(b4);


  IntStatistics stats5 = new IntStatistics();
  stats5.setMinMax(50, 50);
  stats5.setNumNulls(7);
  BlockMetaData b5 = makeBlockFromStats(stats5, 305);
  blocks.add(b5);

  IntStatistics stats6 = new IntStatistics();
  stats6.setMinMax(0, 0);
  stats6.setNumNulls(12);
  BlockMetaData b6 = makeBlockFromStats(stats6, 306);
  blocks.add(b6);

  MessageType schema = MessageTypeParser.parseMessageType("message Document { optional int32 foo; }");
  IntColumn foo = intColumn("foo");

  List<BlockMetaData> filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 50)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b5), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, 50)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b3, b4, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, null)), blocks, schema);
  assertEquals(Arrays.asList(b1, b3, b4, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, null)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b3, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 0)), blocks, schema);
  assertEquals(Arrays.asList(b6), filtered);
}