com.tdunning.math.stats.Centroid Java Examples

The following examples show how to use com.tdunning.math.stats.Centroid. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TDigestQuantilesTest.java From beam with Apache License 2.0

6 votes

private <T> boolean encodeDecodeEquals(MergingDigest tDigest) throws IOException {
  MergingDigest decoded = CoderUtils.clone(new MergingDigestCoder(), tDigest);

  boolean equal = true;
  // the only way to compare the two sketches is to compare them centroid by centroid.
  // Indeed, the means are doubles but are encoded as float and cast during decoding.
  // This entails a small approximation that makes the centroids different after decoding.
  Iterator<Centroid> it1 = decoded.centroids().iterator();
  Iterator<Centroid> it2 = tDigest.centroids().iterator();

  for (int i = 0; i < decoded.centroids().size(); i++) {
    Centroid c1 = it1.next();
    Centroid c2 = it2.next();
    if ((float) c1.mean() != (float) c2.mean() || c1.count() != c2.count()) {
      equal = false;
      break;
    }
  }
  return equal;
}

Example #2

Source File: TDigestState.java From Elasticsearch with Apache License 2.0

5 votes

public static void write(TDigestState state, StreamOutput out) throws IOException {
    out.writeDouble(state.compression);
    out.writeVInt(state.centroidCount());
    for (Centroid centroid : state.centroids()) {
        out.writeDouble(centroid.mean());
        out.writeVLong(centroid.count());
    }
}

Example #3

Source File: BinFill.java From t-digest with Apache License 2.0

5 votes

@Test
public void sampleFill() {
    System.out.printf("scale,delta,centroid,mean,count\n");
    for (double delta : new double[]{5, 10}) {
        double[] data = {0, 0, 3, 4, 1, 6, 0, 5, 2, 0, 3, 3, 2, 3, 0, 2, 5, 0, 3, 1};

        MergingDigest t1 = new MergingDigest(delta);
        t1.setScaleFunction(ScaleFunction.K_1);

        MergingDigest t2 = new MergingDigest(delta);
        t2.setScaleFunction(ScaleFunction.K_2);

        MergingDigest t3 = new MergingDigest(delta);
        t3.setScaleFunction(ScaleFunction.K_3);
        for (double x : data) {
            t1.add(x);
            t2.add(x);
            t3.add(x);
        }


        int i = 1;
        for (MergingDigest t : Lists.newArrayList(t1, t2, t3)) {
            System.out.printf("> %d, %.0f, %.5f, %.5f\n", i, delta, t.quantile(0.65), Dist.quantile(0.65, data));
            int j = 0;
            for (Centroid centroid : t.centroids()) {
                System.out.printf("%d,%.0f,%d,%.5f,%d\n", i, delta, j, centroid.mean(), centroid.count());
                j++;
            }
            i++;
        }
    }
}

Example #4

Source File: TDigestState.java From crate with Apache License 2.0

5 votes

public static void write(TDigestState state, StreamOutput out) throws IOException {
    out.writeDouble(state.compression);
    out.writeDoubleArray(state.fractions);
    out.writeVInt(state.centroidCount());
    for (Centroid centroid : state.centroids()) {
        out.writeDouble(centroid.mean());
        out.writeVLong(centroid.count());
    }
}

Example #5

Source File: BinFill.java From t-digest with Apache License 2.0

4 votes

public static void main(String[] args) throws FileNotFoundException {
    try (PrintWriter out = new PrintWriter("bin-fill.csv")) {
        out.printf("iteration,dist,algo,scale,q,x,k0,k1,dk,q0,q1,count,max0,max1\n");

        // for all scale functions except the non-normalized ones
        for (ScaleFunction f : ScaleFunction.values()) {
            if (f.toString().contains("NO_NORM")) {
                continue;
            }
            System.out.printf("%s\n", f);

            // for all kinds of t-digests
            for (Util.Factory factory : Util.Factory.values()) {
                // for different distributions of values
                for (Util.Distribution distribution : Util.Distribution.values()) {
                    AbstractDistribution gen = distribution.create(new Random());
                    // do multiple passes
                    for (int i = 0; i < 10; i++) {
                        TDigest dist = factory.create();
                        if (dist instanceof MergingDigest) {
                            // can only set scale function on merging digest right now ...
                            // ability for TreeDigest coming soon
                            dist.setScaleFunction(f);
                        }
                        for (int j = 0; j < N; j++) {
                            dist.add(gen.nextDouble());
                        }

                        // now dump stats for the centroids
                        double q0 = 0;
                        double k0 = 0;
                        for (Centroid c : dist.centroids()) {
                            double q1 = q0 + (double) c.count() / N;
                            double k1 = f.k(q1, dist.compression(), dist.size());
                            out.printf("%d,%s,%s,%s,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%.7f,%d,%.1f,%.1f\n",
                                    i, distribution, factory, f, (q0 + q1) / 2, c.mean(),
                                    k0, k1, k1 - k0, q0, q1, c.count(),
                                    dist.size() * f.max(q0, dist.compression(), dist.size()),
                                    dist.size() * f.max(q1, dist.compression(), dist.size())
                            );
                            q0 = q1;
                            k0 = k1;
                        }
                    }
                }
            }
        }
    }
}