com.tdunning.math.stats.Centroid Java Examples
The following examples show how to use
com.tdunning.math.stats.Centroid.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TDigestQuantilesTest.java From beam with Apache License 2.0 | 6 votes |
private <T> boolean encodeDecodeEquals(MergingDigest tDigest) throws IOException { MergingDigest decoded = CoderUtils.clone(new MergingDigestCoder(), tDigest); boolean equal = true; // the only way to compare the two sketches is to compare them centroid by centroid. // Indeed, the means are doubles but are encoded as float and cast during decoding. // This entails a small approximation that makes the centroids different after decoding. Iterator<Centroid> it1 = decoded.centroids().iterator(); Iterator<Centroid> it2 = tDigest.centroids().iterator(); for (int i = 0; i < decoded.centroids().size(); i++) { Centroid c1 = it1.next(); Centroid c2 = it2.next(); if ((float) c1.mean() != (float) c2.mean() || c1.count() != c2.count()) { equal = false; break; } } return equal; }
Example #2
Source File: TDigestState.java From Elasticsearch with Apache License 2.0 | 5 votes |
public static void write(TDigestState state, StreamOutput out) throws IOException { out.writeDouble(state.compression); out.writeVInt(state.centroidCount()); for (Centroid centroid : state.centroids()) { out.writeDouble(centroid.mean()); out.writeVLong(centroid.count()); } }
Example #3
Source File: BinFill.java From t-digest with Apache License 2.0 | 5 votes |
@Test public void sampleFill() { System.out.printf("scale,delta,centroid,mean,count\n"); for (double delta : new double[]{5, 10}) { double[] data = {0, 0, 3, 4, 1, 6, 0, 5, 2, 0, 3, 3, 2, 3, 0, 2, 5, 0, 3, 1}; MergingDigest t1 = new MergingDigest(delta); t1.setScaleFunction(ScaleFunction.K_1); MergingDigest t2 = new MergingDigest(delta); t2.setScaleFunction(ScaleFunction.K_2); MergingDigest t3 = new MergingDigest(delta); t3.setScaleFunction(ScaleFunction.K_3); for (double x : data) { t1.add(x); t2.add(x); t3.add(x); } int i = 1; for (MergingDigest t : Lists.newArrayList(t1, t2, t3)) { System.out.printf("> %d, %.0f, %.5f, %.5f\n", i, delta, t.quantile(0.65), Dist.quantile(0.65, data)); int j = 0; for (Centroid centroid : t.centroids()) { System.out.printf("%d,%.0f,%d,%.5f,%d\n", i, delta, j, centroid.mean(), centroid.count()); j++; } i++; } } }
Example #4
Source File: TDigestState.java From crate with Apache License 2.0 | 5 votes |
public static void write(TDigestState state, StreamOutput out) throws IOException { out.writeDouble(state.compression); out.writeDoubleArray(state.fractions); out.writeVInt(state.centroidCount()); for (Centroid centroid : state.centroids()) { out.writeDouble(centroid.mean()); out.writeVLong(centroid.count()); } }
Example #5
Source File: BinFill.java From t-digest with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws FileNotFoundException { try (PrintWriter out = new PrintWriter("bin-fill.csv")) { out.printf("iteration,dist,algo,scale,q,x,k0,k1,dk,q0,q1,count,max0,max1\n"); // for all scale functions except the non-normalized ones for (ScaleFunction f : ScaleFunction.values()) { if (f.toString().contains("NO_NORM")) { continue; } System.out.printf("%s\n", f); // for all kinds of t-digests for (Util.Factory factory : Util.Factory.values()) { // for different distributions of values for (Util.Distribution distribution : Util.Distribution.values()) { AbstractDistribution gen = distribution.create(new Random()); // do multiple passes for (int i = 0; i < 10; i++) { TDigest dist = factory.create(); if (dist instanceof MergingDigest) { // can only set scale function on merging digest right now ... // ability for TreeDigest coming soon dist.setScaleFunction(f); } for (int j = 0; j < N; j++) { dist.add(gen.nextDouble()); } // now dump stats for the centroids double q0 = 0; double k0 = 0; for (Centroid c : dist.centroids()) { double q1 = q0 + (double) c.count() / N; double k1 = f.k(q1, dist.compression(), dist.size()); out.printf("%d,%s,%s,%s,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%d,%.1f,%.1f\n", i, distribution, factory, f, (q0 + q1) / 2, c.mean(), k0, k1, k1 - k0, q0, q1, c.count(), dist.size() * f.max(q0, dist.compression(), dist.size()), dist.size() * f.max(q1, dist.compression(), dist.size()) ); q0 = q1; k0 = k1; } } } } } } }