org.apache.parquet.filter2.predicate.Operators.IntColumn Java Examples

The following examples show how to use org.apache.parquet.filter2.predicate.Operators.IntColumn. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #2
Source File: ParquetTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #3
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testLtInt() throws Exception {
  IntColumn i32 = intColumn("int32_field");
  int lowest = Integer.MAX_VALUE;
  for (int value : intValues) {
    lowest = Math.min(lowest, value);
  }

  assertTrue("Should drop: < lowest value",
      canDrop(lt(i32, lowest), ccmd, dictionaries));
  assertFalse("Should not drop: < (lowest value + 1)",
      canDrop(lt(i32, lowest + 1), ccmd, dictionaries));

  assertFalse("Should not drop: contains matching values",
      canDrop(lt(i32, Integer.MAX_VALUE), ccmd, dictionaries));
}
 
Example #4
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testColumnWithoutDictionary() throws Exception {
  IntColumn plain = intColumn("plain_int32_field");
  DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);

  assertFalse("Should never drop block using plain encoding",
      canDrop(eq(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(lt(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(ltEq(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));

  verifyZeroInteractions(dictionaryStore);
}
 
Example #5
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testColumnWithDictionaryAndPlainEncodings() throws Exception {
  IntColumn plain = intColumn("fallback_binary_field");
  DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);

  assertFalse("Should never drop block using plain encoding",
      canDrop(eq(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(lt(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(ltEq(plain, -10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));

  assertFalse("Should never drop block using plain encoding",
      canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));

  verifyZeroInteractions(dictionaryStore);
}
 
Example #6
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThanOrEqual, "exp has to be GreaterThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gtEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gtEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gtEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gtEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #7
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate lessThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThanOrEqual, "exp has to be LessThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.ltEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.ltEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.ltEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.ltEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #8
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThan, "exp has to be GreaterThan");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #9
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThanOrEqual, "exp has to be GreaterThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gtEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gtEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gtEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gtEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #10
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate lessThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThanOrEqual, "exp has to be LessThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.ltEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.ltEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.ltEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.ltEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #11
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThan, "exp has to be GreaterThan");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #12
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testInverseUdpMissingColumn() throws Exception {
  InInt32UDP nullRejecting = new InInt32UDP(ImmutableSet.of(42));
  InInt32UDP nullAccepting = new InInt32UDP(Sets.newHashSet((Integer) null));
  IntColumn fake = intColumn("missing_column");

  assertTrue("Should drop block for null accepting udp",
    canDrop(LogicalInverseRewriter.rewrite(not(userDefined(fake, nullAccepting))), ccmd, dictionaries));
  assertFalse("Should not drop block for null rejecting udp",
    canDrop(LogicalInverseRewriter.rewrite(not(userDefined(fake, nullRejecting))), ccmd, dictionaries));
}
 
Example #13
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testUdpMissingColumn() throws Exception {
  InInt32UDP nullRejecting = new InInt32UDP(ImmutableSet.of(42));
  InInt32UDP nullAccepting = new InInt32UDP(Sets.newHashSet((Integer) null));
  IntColumn fake = intColumn("missing_column");

  assertTrue("Should drop block for null rejecting udp",
    canDrop(userDefined(fake, nullRejecting), ccmd, dictionaries));
  assertFalse("Should not drop block for null accepting udp",
    canDrop(userDefined(fake, nullAccepting), ccmd, dictionaries));
}
 
Example #14
Source File: TestRowGroupFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testApplyRowGroupFilters() {

  List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();

  IntStatistics stats1 = new IntStatistics();
  stats1.setMinMax(10, 100);
  stats1.setNumNulls(4);
  BlockMetaData b1 = makeBlockFromStats(stats1, 301);
  blocks.add(b1);

  IntStatistics stats2 = new IntStatistics();
  stats2.setMinMax(8, 102);
  stats2.setNumNulls(0);
  BlockMetaData b2 = makeBlockFromStats(stats2, 302);
  blocks.add(b2);

  IntStatistics stats3 = new IntStatistics();
  stats3.setMinMax(100, 102);
  stats3.setNumNulls(12);
  BlockMetaData b3 = makeBlockFromStats(stats3, 303);
  blocks.add(b3);


  IntStatistics stats4 = new IntStatistics();
  stats4.setMinMax(0, 0);
  stats4.setNumNulls(304);
  BlockMetaData b4 = makeBlockFromStats(stats4, 304);
  blocks.add(b4);


  IntStatistics stats5 = new IntStatistics();
  stats5.setMinMax(50, 50);
  stats5.setNumNulls(7);
  BlockMetaData b5 = makeBlockFromStats(stats5, 305);
  blocks.add(b5);

  IntStatistics stats6 = new IntStatistics();
  stats6.setMinMax(0, 0);
  stats6.setNumNulls(12);
  BlockMetaData b6 = makeBlockFromStats(stats6, 306);
  blocks.add(b6);

  MessageType schema = MessageTypeParser.parseMessageType("message Document { optional int32 foo; }");
  IntColumn foo = intColumn("foo");

  List<BlockMetaData> filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 50)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b5), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, 50)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b3, b4, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, null)), blocks, schema);
  assertEquals(Arrays.asList(b1, b3, b4, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, null)), blocks, schema);
  assertEquals(Arrays.asList(b1, b2, b3, b5, b6), filtered);

  filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 0)), blocks, schema);
  assertEquals(Arrays.asList(b6), filtered);
}
 
Example #15
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildUInt8() {
  PrimitiveType type = Types.required(INT32).as(UINT_8).named("test_uint8");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
  assertNull(builder.build());
  IntColumn col = intColumn("test_col");

  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, 4, 10));
  builder.add(sb.stats(type, 11, 17, null));
  builder.add(sb.stats(type, 2, 2, null, null));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, 1, 0xFF));
  builder.add(sb.stats(type, 0xEF, 0xFA));
  assertEquals(6, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  ColumnIndex columnIndex = builder.build();
  assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
  assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
  assertCorrectValues(columnIndex.getMaxValues(), 10, 17, 2, null, 0xFF, 0xFA);
  assertCorrectValues(columnIndex.getMinValues(), 4, 11, 2, null, 1, 0xEF);
  assertCorrectFiltering(columnIndex, eq(col, 2), 2, 4);
  assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
  assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
  assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
  assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, lt(col, 0xEF), 0, 1, 2, 4);
  assertCorrectFiltering(columnIndex, ltEq(col, 0xEF), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 4, 5);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  sb = new StatsBuilder();
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 0, 0, null, null));
  builder.add(sb.stats(type, 0, 42, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, 42, 0xEE));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 0xEF, 0xFF));
  builder.add(sb.stats(type, null, null));
  assertEquals(9, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  columnIndex = builder.build();
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
  assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
  assertCorrectValues(columnIndex.getMaxValues(), null, 0, 42, null, null, 0xEE, null, 0xFF, null);
  assertCorrectValues(columnIndex.getMinValues(), null, 0, 0, null, null, 42, null, 0xEF, null);
  assertCorrectFiltering(columnIndex, eq(col, 2), 2);
  assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
  assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
  assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
  assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
  assertCorrectFiltering(columnIndex, gtEq(col, 0xEE), 5, 7);
  assertCorrectFiltering(columnIndex, lt(col, 42), 1, 2);
  assertCorrectFiltering(columnIndex, ltEq(col, 42), 1, 2, 5);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
      8);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  sb = new StatsBuilder();
  builder.add(sb.stats(type, null, null, null, null, null));
  builder.add(sb.stats(type, 0xFF, 0xFF));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, 0xEF, 0xEA, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 0xEE, 42));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 41, 0));
  assertEquals(9, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  columnIndex = builder.build();
  assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
  assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
  assertCorrectValues(columnIndex.getMaxValues(), null, 0xFF, null, 0xEF, null, 0xEE, null, null, 41);
  assertCorrectValues(columnIndex.getMinValues(), null, 0xFF, null, 0xEA, null, 42, null, null, 0);
  assertCorrectFiltering(columnIndex, eq(col, 0xAB), 5);
  assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
  assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7, 8);
  assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
  assertCorrectFiltering(columnIndex, gt(col, 0xFF));
  assertCorrectFiltering(columnIndex, gtEq(col, 0xFF), 1);
  assertCorrectFiltering(columnIndex, lt(col, 42), 8);
  assertCorrectFiltering(columnIndex, ltEq(col, 42), 5, 8);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 2, 3, 4, 5, 6, 7,
      8);
}
 
Example #16
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildInt32() {
  PrimitiveType type = Types.required(INT32).named("test_int32");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
  assertNull(builder.build());
  IntColumn col = intColumn("test_col");

  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, -4, 10));
  builder.add(sb.stats(type, -11, 7, null));
  builder.add(sb.stats(type, 2, 2, null, null));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, 1, 2));
  builder.add(sb.stats(type, -21, 8));
  assertEquals(6, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  ColumnIndex columnIndex = builder.build();
  assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
  assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
  assertCorrectValues(columnIndex.getMaxValues(), 10, 7, 2, null, 2, 8);
  assertCorrectValues(columnIndex.getMinValues(), -4, -11, 2, null, 1, -21);
  assertCorrectFiltering(columnIndex, eq(col, 2), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
  assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
  assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
  assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, lt(col, 2), 0, 1, 4, 5);
  assertCorrectFiltering(columnIndex, ltEq(col, 2), 0, 1, 2, 4, 5);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 5);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  sb = new StatsBuilder();
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, -532, -345, null, null));
  builder.add(sb.stats(type, -500, -42, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, -42, 2));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 3, 42));
  builder.add(sb.stats(type, null, null));
  assertEquals(9, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  columnIndex = builder.build();
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
  assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
  assertCorrectValues(columnIndex.getMaxValues(), null, -345, -42, null, null, 2, null, 42, null);
  assertCorrectValues(columnIndex.getMinValues(), null, -532, -500, null, null, -42, null, 3, null);
  assertCorrectFiltering(columnIndex, eq(col, 2), 5);
  assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
  assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
  assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
  assertCorrectFiltering(columnIndex, gt(col, 2), 7);
  assertCorrectFiltering(columnIndex, gtEq(col, 2), 5, 7);
  assertCorrectFiltering(columnIndex, lt(col, 2), 1, 2, 5);
  assertCorrectFiltering(columnIndex, ltEq(col, 2), 1, 2, 5);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
      8);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  sb = new StatsBuilder();
  builder.add(sb.stats(type, null, null, null, null, null));
  builder.add(sb.stats(type, 532, 345));
  builder.add(sb.stats(type, null, null, null));
  builder.add(sb.stats(type, 234, 42, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, 42, -2));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, null, null));
  builder.add(sb.stats(type, -3, -42));
  assertEquals(9, builder.getPageCount());
  assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
  columnIndex = builder.build();
  assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
  assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
  assertCorrectValues(columnIndex.getMaxValues(), null, 532, null, 234, null, 42, null, null, -3);
  assertCorrectValues(columnIndex.getMinValues(), null, 345, null, 42, null, -2, null, null, -42);
  assertCorrectFiltering(columnIndex, eq(col, 2), 5);
  assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
  assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
  assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
  assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
  assertCorrectFiltering(columnIndex, gtEq(col, 2), 1, 3, 5);
  assertCorrectFiltering(columnIndex, lt(col, 2), 5, 8);
  assertCorrectFiltering(columnIndex, ltEq(col, 2), 5, 8);
  assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
      8);
}
 
Example #17
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public static IntColumn intColumn(String columnPath) {
  return new IntColumn(ColumnPath.fromDotString(columnPath));
}