org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { floatInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new FloatWritable(1), new IntWritable(400) });
    eval.iterate(state, new Object[] { new FloatWritable(2), new IntWritable(400) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(400, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example #2
Source File: DataToArrayOfDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToArrayOfDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    ArrayOfDoublesState state = (ArrayOfDoublesState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1.0)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1.0)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), DEFAULT_NOMINAL_ENTRIES);
    Assert.assertEquals(((IntWritable) r.get(1)).get(), 1);
    ArrayOfDoublesSketch resultSketch = ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertFalse(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #3
Source File: DataToArrayOfDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
  public void partial1ModeStringKeysExplicitParams() throws Exception {
    ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, doubleInspector, doubleInspector, intInspector, floatInspector };
    GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
    try (GenericUDAFEvaluator eval = new DataToArrayOfDoublesSketchUDAF().getEvaluator(info)) {
      ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
      checkIntermediateResultInspector(resultInspector);

      ArrayOfDoublesState state = (ArrayOfDoublesState) eval.getNewAggregationBuffer();
      eval.iterate(state, new Object[] {new Text("a"), new DoubleWritable(1), new DoubleWritable(2), new IntWritable(32), new FloatWritable(0.99f)});
      eval.iterate(state, new Object[] {new Text("b"), new DoubleWritable(1), new DoubleWritable(2), new IntWritable(32), new FloatWritable(0.99f)});

      Object result = eval.terminatePartial(state);
      Assert.assertNotNull(result);
      Assert.assertTrue(result instanceof List);
      List<?> r = (List<?>) result;
      Assert.assertEquals(r.size(), 3);
      Assert.assertEquals(((IntWritable) r.get(0)).get(), 32);
      Assert.assertEquals(((IntWritable) r.get(1)).get(), 2);
      ArrayOfDoublesSketch resultSketch = ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
      // because of sampling probability < 1
      Assert.assertTrue(resultSketch.isEstimationMode());
      Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
    }
}
 
Example #4
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { floatInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new FloatWritable(1), new IntWritable(400) });
    eval.iterate(state, new Object[] { new FloatWritable(2), new IntWritable(400) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(400, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example #5
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeIntValuesDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    UnionState state = (UnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #6
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #7
Source File: UnionDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partia1ModelGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()), new IntWritable(256) });

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #8
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), SketchEvaluator.DEFAULT_LG_K);
    Assert.assertEquals(((LongWritable) r.get(1)).get(), DEFAULT_UPDATE_SEED);
    CpcSketch resultSketch = CpcSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #9
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #10
Source File: UnionDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial2Mode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #11
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #12
Source File: IntersectSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new IntersectSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    DataToSketchUDAFTest.checkFinalResultInspector(resultInspector);

    IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState state =
        (IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().build();
    sketch1.update(1);
    sketch1.update(2);
    sketch1.update(3);
    eval.merge(state, Arrays.asList(
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch1.compact().toByteArray())
    ));

    UpdateSketch sketch2 = UpdateSketch.builder().build();
    sketch2.update(2);
    sketch2.update(3);
    sketch2.update(4);
    eval.merge(state, Arrays.asList(
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch2.compact().toByteArray())
    ));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getRetainedEntries(true), 2);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #13
Source File: UnionDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModelDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().build();
    sketch1.update(1.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()) });

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().build();
    sketch2.update(2.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);

    eval.reset(state);
    Assert.assertNull(eval.terminate(state));
  }
}
 
Example #14
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDoubleKeysExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, doubleInspector, intInspector, floatInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new DoubleWritable(1), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #15
Source File: UnionStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = UDFArgumentTypeException.class)
public void getEvaluatorWrongCategory() throws Exception {
  ObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
    Arrays.asList("a"),
    Arrays.asList(binaryInspector)
  );
  ObjectInspector[] inspectors = new ObjectInspector[] { structInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  new UnionStringsSketchUDAF().getEvaluator(info);
}
 
Example #16
Source File: UnionArrayOfDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionArrayOfDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    DataToArrayOfDoublesSketchUDAFTest.checkFinalResultInspector(resultInspector);

    ArrayOfDoublesState state = (ArrayOfDoublesState) eval.getNewAggregationBuffer();

    ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
    sketch1.update(1, new double[] {1});
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new IntWritable(1),
      new BytesWritable(sketch1.compact().toByteArray()))
    );

    ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build();
    sketch2.update(2, new double[] {1});
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new IntWritable(1),
      new BytesWritable(sketch2.compact().toByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    ArrayOfDoublesSketch resultSketch = ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #17
Source File: IntersectSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDefaultSeed() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new IntersectSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToSketchUDAFTest.checkFinalResultInspector(resultInspector);

    IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState state =
        (IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().build();
    sketch1.update(1);
    sketch1.update(2);
    sketch1.update(3);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.compact().toByteArray())});

    UpdateSketch sketch2 = UpdateSketch.builder().build();
    sketch2.update(2);
    sketch2.update(3);
    sketch2.update(4);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.compact().toByteArray())});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #18
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = { UDFArgumentTypeException.class })
public void initInvalidCategoryArg4() throws SemanticException {
  DataToSketchUDAF udf = new DataToSketchUDAF();
  GenericUDAFParameterInfo params = new SimpleGenericUDAFParameterInfo(
      new ObjectInspector[] { stringInspector, intConstantInspector, floatConstantInspector, structInspector }, false, false, false);
  udf.getEvaluator(params);
}
 
Example #19
Source File: MergeTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentLengthException.class)
public void testInvalidNumberOfParams() throws HiveException {
    Merge udaf = new Merge();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
            ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector),
            ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector)
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
}
 
Example #20
Source File: MergeTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentTypeException.class)
public void testPrimitiveParam() throws HiveException {
    Merge udaf = new Merge();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
        PrimitiveObjectInspectorFactory.javaStringObjectInspector
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
}
 
Example #21
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    checkFinalResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch1.update(1);
    eval.merge(state, Arrays.asList(
      new IntWritable(SketchEvaluator.DEFAULT_LG_K),
      new Text(SketchEvaluator.DEFAULT_HLL_TYPE.toString()),
      new BytesWritable(sketch1.toCompactByteArray()))
    );

    HllSketch sketch2 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch2.update(2);
    eval.merge(state, Arrays.asList(
        new IntWritable(SketchEvaluator.DEFAULT_LG_K),
        new Text(SketchEvaluator.DEFAULT_HLL_TYPE.toString()),
        new BytesWritable(sketch2.toCompactByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #22
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector, longInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);

    final int nomEntries = 16;
    final long seed = 1;
    UnionState state = (UnionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().setSeed(seed).build();
    sketch1.update(1);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.compact().toByteArray()), new IntWritable(nomEntries), new LongWritable(seed)});

    UpdateSketch sketch2 = UpdateSketch.builder().setSeed(seed).build();
    sketch2.update(2);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.compact().toByteArray()), new IntWritable(nomEntries), new LongWritable(seed)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), nomEntries);
    Assert.assertEquals(((LongWritable) r.get(1)).get(), seed);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)), seed);
    Assert.assertFalse(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #23
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalModeCustomSeed() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    checkFinalResultInspector(resultInspector);

    final long seed = 123;
    final State state = (State) eval.getNewAggregationBuffer();

    CpcSketch sketch1 = new CpcSketch(SketchEvaluator.DEFAULT_LG_K, seed);
    sketch1.update(1);
    eval.merge(state, Arrays.asList(
      new IntWritable(SketchEvaluator.DEFAULT_LG_K),
      new LongWritable(seed),
      new BytesWritable(sketch1.toByteArray()))
    );

    CpcSketch sketch2 = new CpcSketch(SketchEvaluator.DEFAULT_LG_K, seed);
    sketch2.update(2);
    eval.merge(state, Arrays.asList(
        new IntWritable(SketchEvaluator.DEFAULT_LG_K),
        new LongWritable(seed),
        new BytesWritable(sketch2.toByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    CpcSketch resultSketch = CpcSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result), seed);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #24
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    DataToSketchUDAFTest.checkFinalResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch1.update(1);
    eval.merge(state, Arrays.asList(
      new IntWritable(SketchEvaluator.DEFAULT_LG_K),
      new Text(SketchEvaluator.DEFAULT_HLL_TYPE.toString()),
      new BytesWritable(sketch1.toCompactByteArray()))
    );

    HllSketch sketch2 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch2.update(2);
    eval.merge(state, Arrays.asList(
      new IntWritable(SketchEvaluator.DEFAULT_LG_K),
      new Text(SketchEvaluator.DEFAULT_HLL_TYPE.toString()),
      new BytesWritable(sketch2.toCompactByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #25
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeStringKeysExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intConstantInspector, longConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    final int lgK = 10;
    final long seed = 123;

    State state = (State) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new Text("a"), new IntWritable(lgK), new LongWritable(seed)});
    eval.iterate(state, new Object[] {new Text("b"), new IntWritable(lgK), new LongWritable(seed)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), lgK);
    Assert.assertEquals(((LongWritable) r.get(1)).get(), seed);
    CpcSketch resultSketch = CpcSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)), seed);
    Assert.assertEquals(resultSketch.getLgK(), lgK);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #26
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intConstantInspector, stringConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);

    final int lgK = 10;
    final TgtHllType hllType = TgtHllType.HLL_6;
    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(lgK, hllType);
    sketch1.update(1);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.toCompactByteArray()),
        new IntWritable(lgK), new Text(hllType.toString())});

    HllSketch sketch2 = new HllSketch(lgK, hllType);
    sketch2.update(2);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.toCompactByteArray()),
        new IntWritable(lgK), new Text(hllType.toString())});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), lgK);
    Assert.assertEquals(((Text) r.get(1)).toString(), hllType.toString());
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getLgConfigK(), lgK);
    Assert.assertEquals(resultSketch.getTgtHllType(), hllType);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #27
Source File: MergeTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentTypeException.class)
public void testCompleteFunnelSizeMismatch() throws HiveException {
    Merge udaf = new Merge();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
            ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector)
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);

    ObjectInspector outputObjectInspector = udafEvaluator.init(Mode.COMPLETE, inputObjectInspectorList);

    // Setup two funnels, different sizes.
    List<Long> funnel1 = new ArrayList<>();
    funnel1.add(1L);
    funnel1.add(1L);
    funnel1.add(0L);

    List<Long> funnel2 = new ArrayList<>();
    funnel2.add(1L);
    funnel2.add(0L);

    Object[] parameters1 = new Object[]{funnel1};
    Object[] parameters2 = new Object[]{funnel2};

    // Should cause an error when merging funnels of different sizes
    AggregationBuffer agg = udafEvaluator.getNewAggregationBuffer();
    udafEvaluator.reset(agg);
    udafEvaluator.iterate(agg, parameters1);
    udafEvaluator.iterate(agg, parameters2);
}
 
Example #28
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void mergeTerminateEmptyState() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();

    ItemsSketch<String> sketch1 = new ItemsSketch<>(256);
    sketch1.update("a");
    eval.merge(state, new BytesWritable(sketch1.toByteArray(serDe)));

    ItemsSketch<String> sketch2 = new ItemsSketch<>(256);
    sketch2.update("b");
    eval.merge(state, new BytesWritable(sketch2.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #29
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeStringKeysExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intConstantInspector, stringConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    final int lgK = 10;
    final TgtHllType hllType = TgtHllType.HLL_8;

    State state = (State) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new Text("a"), new IntWritable(lgK), new Text(hllType.toString())});
    eval.iterate(state, new Object[] {new Text("b"), new IntWritable(lgK), new Text(hllType.toString())});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), lgK);
    Assert.assertEquals(((Text) r.get(1)).toString(), hllType.toString());
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getLgConfigK(), lgK);
    Assert.assertEquals(resultSketch.getTgtHllType(), hllType);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #30
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeCheckTrimmingToNominal() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    for (int i = 0; i < 10000; i++) {
      eval.iterate(state, new Object[] {new IntWritable(i), new DoubleWritable(1)});
    }

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 10000.0, 10000 * 0.03);
    Assert.assertTrue(resultSketch.getRetainedEntries() <= 4096, "retained entries: " + resultSketch.getRetainedEntries());

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}