com.tdunning.math.stats.MergingDigest Java Examples
The following examples show how to use
com.tdunning.math.stats.MergingDigest.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MergeBench.java From t-digest with Apache License 2.0 | 6 votes |
@Setup public void setup() { data = new double[10000000]; for (int i = 0; i < data.length; i++) { data[i] = gen.nextDouble(); } td = new MergingDigest(compression, (factor + 1) * compression, compression); td.setScaleFunction(ScaleFunction.valueOf(scaleFunction)); // First values are very cheap to add, we are more interested in the steady state, // when the summary is full. Summaries are expected to contain about 0.6*compression // centroids, hence the 5 * compression * (factor+1) for (int i = 0; i < 5 * compression * (factor + 1); ++i) { td.add(gen.nextDouble()); } }
Example #2
Source File: TDigestQuantilesTest.java From beam with Apache License 2.0 | 6 votes |
private <T> boolean encodeDecodeEquals(MergingDigest tDigest) throws IOException { MergingDigest decoded = CoderUtils.clone(new MergingDigestCoder(), tDigest); boolean equal = true; // the only way to compare the two sketches is to compare them centroid by centroid. // Indeed, the means are doubles but are encoded as float and cast during decoding. // This entails a small approximation that makes the centroids different after decoding. Iterator<Centroid> it1 = decoded.centroids().iterator(); Iterator<Centroid> it2 = tDigest.centroids().iterator(); for (int i = 0; i < decoded.centroids().size(); i++) { Centroid c1 = it1.next(); Centroid c2 = it2.next(); if ((float) c1.mean() != (float) c2.mean() || c1.count() != c2.count()) { equal = false; break; } } return equal; }
Example #3
Source File: ComparisonTest.java From t-digest with Apache License 2.0 | 6 votes |
private void compareQD(PrintWriter out, AbstractContinousDistribution gen, String tag, long scale) { for (double compression : new double[]{10, 20, 50, 100, 200, 500, 1000, 2000}) { QDigest qd = new QDigest(compression); TDigest dist = new MergingDigest(compression); double[] data = new double[100000]; for (int i = 0; i < 100000; i++) { double x = gen.nextDouble(); dist.add(x); qd.offer((long) (x * scale)); data[i] = x; } dist.compress(); Arrays.sort(data); for (double q : new double[]{1e-5, 1e-4, 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 0.9999, 0.99999}) { double x1 = dist.quantile(q); double x2 = (double) qd.getQuantile(q) / scale; double e1 = Dist.cdf(x1, data) - q; double e2 = Dist.cdf(x2, data) - q; out.printf("%s,%.0f,%.8f,%.10g,%.10g,%d,%d\n", tag, compression, q, e1, e2, dist.smallByteSize(), QDigest.serialize(qd).length); } } }
Example #4
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 5 votes |
@Override public void encode(MergingDigest value, OutputStream outStream) throws IOException { if (value == null) { throw new CoderException("cannot encode a null T-Digest sketch"); } ByteBuffer buf = ByteBuffer.allocate(value.byteSize()); value.asBytes(buf); BYTE_ARRAY_CODER.encode(buf.array(), outStream); }
Example #5
Source File: TDigestNumericHistogram.java From geowave with Apache License 2.0 | 5 votes |
@Override public void fromBinary(final ByteBuffer buffer) { tdigest = MergingDigest.fromBytes(buffer); final byte[] remaining = new byte[buffer.remaining()]; buffer.get(remaining); count = ByteArrayUtils.variableLengthDecode(remaining); }
Example #6
Source File: TDigestQuantilesTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMergeAccum() { Random rd = new Random(1234); List<MergingDigest> accums = new ArrayList<>(); for (int i = 0; i < 3; i++) { MergingDigest std = new MergingDigest(100); for (int j = 0; j < 1000; j++) { std.add(rd.nextDouble()); } accums.add(std); } TDigestQuantilesFn fn = TDigestQuantilesFn.create(100); MergingDigest res = fn.mergeAccumulators(accums); }
Example #7
Source File: TDigestQuantilesTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCoder() throws Exception { MergingDigest tDigest = new MergingDigest(1000); for (int i = 0; i < 10; i++) { tDigest.add(2.4 + i); } Assert.assertTrue("Encode and Decode", encodeDecodeEquals(tDigest)); }
Example #8
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 5 votes |
@Override protected long getEncodedElementByteSize(MergingDigest value) throws IOException { if (value == null) { throw new CoderException("cannot encode a null T-Digest sketch"); } return value.byteSize(); }
Example #9
Source File: ComparisonTest.java From t-digest with Apache License 2.0 | 5 votes |
private void compareSQ(PrintWriter out, AbstractContinousDistribution gen, String tag) { double[] quantiles = {0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.99, 0.999}; for (double compression : new double[]{10, 20, 50, 100, 200, 500, 1000, 2000}) { QuantileEstimator sq = new QuantileEstimator(1001); TDigest dist = new MergingDigest(compression); double[] data = new double[100000]; for (int i = 0; i < 100000; i++) { double x = gen.nextDouble(); dist.add(x); sq.add(x); data[i] = x; } dist.compress(); Arrays.sort(data); List<Double> qz = sq.getQuantiles(); for (double q : quantiles) { double x1 = dist.quantile(q); double x2 = qz.get((int) (q * 1000 + 0.5)); double e1 = Dist.cdf(x1, data) - q; double e2 = Dist.cdf(x2, data) - q; out.printf("%s,%.0f,%.8f,%.10g,%.10g,%d,%d\n", tag, compression, q, e1, e2, dist.smallByteSize(), sq.serializedSize()); } } }
Example #10
Source File: BinFill.java From t-digest with Apache License 2.0 | 5 votes |
@Test public void sampleFill() { System.out.printf("scale,delta,centroid,mean,count\n"); for (double delta : new double[]{5, 10}) { double[] data = {0, 0, 3, 4, 1, 6, 0, 5, 2, 0, 3, 3, 2, 3, 0, 2, 5, 0, 3, 1}; MergingDigest t1 = new MergingDigest(delta); t1.setScaleFunction(ScaleFunction.K_1); MergingDigest t2 = new MergingDigest(delta); t2.setScaleFunction(ScaleFunction.K_2); MergingDigest t3 = new MergingDigest(delta); t3.setScaleFunction(ScaleFunction.K_3); for (double x : data) { t1.add(x); t2.add(x); t3.add(x); } int i = 1; for (MergingDigest t : Lists.newArrayList(t1, t2, t3)) { System.out.printf("> %d, %.0f, %.5f, %.5f\n", i, delta, t.quantile(0.65), Dist.quantile(0.65, data)); int j = 0; for (Centroid centroid : t.centroids()) { System.out.printf("%d,%.0f,%d,%.5f,%d\n", i, delta, j, centroid.mean(), centroid.count()); j++; } i++; } } }
Example #11
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 5 votes |
@Override public MergingDigest mergeAccumulators(Iterable<MergingDigest> accumulators) { Iterator<MergingDigest> it = accumulators.iterator(); MergingDigest merged = it.next(); while (it.hasNext()) { merged.add(it.next()); } return merged; }
Example #12
Source File: Util.java From t-digest with Apache License 2.0 | 4 votes |
TDigest create(double compression, int bufferSize) { TDigest digest = new MergingDigest(compression, bufferSize); ((MergingDigest) digest).useAlternatingSort = false; ((MergingDigest) digest).useTwoLevelCompression = false; return digest; }
Example #13
Source File: BinFill.java From t-digest with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws FileNotFoundException { try (PrintWriter out = new PrintWriter("bin-fill.csv")) { out.printf("iteration,dist,algo,scale,q,x,k0,k1,dk,q0,q1,count,max0,max1\n"); // for all scale functions except the non-normalized ones for (ScaleFunction f : ScaleFunction.values()) { if (f.toString().contains("NO_NORM")) { continue; } System.out.printf("%s\n", f); // for all kinds of t-digests for (Util.Factory factory : Util.Factory.values()) { // for different distributions of values for (Util.Distribution distribution : Util.Distribution.values()) { AbstractDistribution gen = distribution.create(new Random()); // do multiple passes for (int i = 0; i < 10; i++) { TDigest dist = factory.create(); if (dist instanceof MergingDigest) { // can only set scale function on merging digest right now ... // ability for TreeDigest coming soon dist.setScaleFunction(f); } for (int j = 0; j < N; j++) { dist.add(gen.nextDouble()); } // now dump stats for the centroids double q0 = 0; double k0 = 0; for (Centroid c : dist.centroids()) { double q1 = q0 + (double) c.count() / N; double k1 = f.k(q1, dist.compression(), dist.size()); out.printf("%d,%s,%s,%s,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%d,%.1f,%.1f\n", i, distribution, factory, f, (q0 + q1) / 2, c.mean(), k0, k1, k1 - k0, q0, q1, c.count(), dist.size() * f.max(q0, dist.compression(), dist.size()), dist.size() * f.max(q1, dist.compression(), dist.size()) ); q0 = q1; k0 = k1; } } } } } } }
Example #14
Source File: Util.java From t-digest with Apache License 2.0 | 4 votes |
TDigest create(double compression) { TDigest digest = new MergingDigest(compression, (int) (10 * compression)); ((MergingDigest) digest).useAlternatingSort = true; ((MergingDigest) digest).useTwoLevelCompression = true; return digest; }
Example #15
Source File: Util.java From t-digest with Apache License 2.0 | 4 votes |
TDigest create(double compression, int bufferSize) { TDigest digest = new MergingDigest(compression, bufferSize); ((MergingDigest) digest).useAlternatingSort = true; ((MergingDigest) digest).useTwoLevelCompression = true; return digest; }
Example #16
Source File: Util.java From t-digest with Apache License 2.0 | 4 votes |
TDigest create(double compression) { TDigest digest = new MergingDigest(compression, (int) (10 * compression)); ((MergingDigest) digest).useAlternatingSort = false; ((MergingDigest) digest).useTwoLevelCompression = false; return digest; }
Example #17
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public PCollection<MergingDigest> expand(PCollection<Double> input) { return input.apply( "Compute T-Digest Structure", Combine.globally(TDigestQuantilesFn.create(this.compression()))); }
Example #18
Source File: TDigestBench.java From t-digest with Apache License 2.0 | 4 votes |
@Override TDigest create(double compression) { return new MergingDigest(compression, (int) (10 * compression)); }
Example #19
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public boolean isRegisterByteSizeObserverCheap(MergingDigest value) { return true; }
Example #20
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public MergingDigest decode(InputStream inStream) throws IOException { byte[] bytes = BYTE_ARRAY_CODER.decode(inStream); ByteBuffer buf = ByteBuffer.wrap(bytes); return MergingDigest.fromBytes(buf); }
Example #21
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<MergingDigest> getDefaultOutputCoder(CoderRegistry registry, Coder inputCoder) { return new MergingDigestCoder(); }
Example #22
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<MergingDigest> getAccumulatorCoder(CoderRegistry registry, Coder inputCoder) { return new MergingDigestCoder(); }
Example #23
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
/** Output the whole structure so it can be queried, reused or stored easily. */ @Override public MergingDigest extractOutput(MergingDigest accum) { return accum; }
Example #24
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public MergingDigest addInput(MergingDigest accum, Double value) { accum.add(value); return accum; }
Example #25
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public MergingDigest createAccumulator() { return new MergingDigest(compression); }
Example #26
Source File: TDigestQuantiles.java From beam with Apache License 2.0 | 4 votes |
@Override public PCollection<KV<K, MergingDigest>> expand(PCollection<KV<K, Double>> input) { return input.apply( "Compute T-Digest Structure", Combine.perKey(TDigestQuantilesFn.create(this.compression()))); }