org.apache.commons.math3.stat.descriptive.moment.StandardDeviation Java Exaples

Source File: UserProfileEigenModeler.java From Eagle with Apache License 2.0

6 votes

private void computeStats(RealMatrix m){

        if(m.getColumnDimension() != this.cmdTypes.length){
            LOG.error("Please fix the commands list in config file");
            return;
        }
        statistics = new UserCommandStatistics[m.getColumnDimension()];
        for(int i=0; i<m.getColumnDimension(); i++){
            UserCommandStatistics stats = new UserCommandStatistics();
            stats.setCommandName(this.cmdTypes[i]);
            RealVector colData = m.getColumnVector(i);
            StandardDeviation deviation = new StandardDeviation();
            double stddev = deviation.evaluate(colData.toArray());
            //LOG.info("stddev is nan?" + (stddev == Double.NaN? "yes":"no"));
            if(stddev <= lowVarianceVal)
                stats.setLowVariant(true);
            else
                stats.setLowVariant(false);
            stats.setStddev(stddev);
            Mean mean = new Mean();
            double mu = mean.evaluate(colData.toArray());
            //LOG.info("mu is nan?" + (mu == Double.NaN? "yes":"no"));
            stats.setMean(mu);
            statistics[i] = stats;
        }
    }

Source File: GCBiasCorrectorUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testGCCorrection() {
    final int numSamples = 5;
    final int numIntervals = 10000;

    final Pair<RealMatrix, double[]> data = simulateData(numSamples, numIntervals);
    final RealMatrix readCounts = data.getLeft();
    final double[] intervalGCContent = data.getRight();

    final RealMatrix correctedCoverage = readCounts.copy();
    GCBiasCorrector.correctGCBias(correctedCoverage, intervalGCContent);
    Utils.nonNull(correctedCoverage);
    final StandardDeviation stdevEvaluator = new StandardDeviation();
    final double[] stdDevsBySample = IntStream.range(0, correctedCoverage.getRowDimension())
            .mapToDouble(r -> stdevEvaluator.evaluate(correctedCoverage.getRow(r))).toArray();
    Arrays.stream(stdDevsBySample).forEach(x -> Assert.assertTrue(x < NON_GC_BIAS_NOISE_LEVEL * MEAN_READ_DEPTH));

    //check that GC-bias correction is approximately idempotent -- if you correct again, very little should happen
    final RealMatrix recorrectedCoverage = correctedCoverage.copy();
    GCBiasCorrector.correctGCBias(recorrectedCoverage, intervalGCContent);
    final double correctedChange = correctedCoverage.subtract(readCounts).getFrobeniusNorm();
    final double recorrectedChange = recorrectedCoverage.subtract(correctedCoverage).getFrobeniusNorm();
    Assert.assertTrue(recorrectedChange < correctedChange / 10.);
}

Source File: SliceSamplerUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Test slice sampling of a normal distribution.  Checks that input mean and standard deviation are recovered
 * by 10000 samples to a relative error of 0.5% and 2%, respectively.
 */
@Test
public void testSliceSamplingOfNormalDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
    final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numSamples = 10000;
    final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}

Source File: QueryManager.java From bullet-core with Apache License 2.0

6 votes

/**
 * Gets some statistics about the current state of partitioning and queries in this manager.
 *
 * @return A {@link Map} of {@link PartitionStat} to their values for the current state of the manager.
 */
public Map<PartitionStat, Object> getStats() {
    Map<PartitionStat, Object> stats = new HashMap<>();
    List<Partition> sorted = partitioning.entrySet().stream().map(Partition::new).sorted().collect(Collectors.toList());
    int size = sorted.size();
    stats.put(PartitionStat.QUERY_COUNT, queries.size());
    stats.put(PartitionStat.PARTITION_COUNT, size);
    stats.put(PartitionStat.ACTUAL_QUERIES_SEEN, queriesSeen);
    stats.put(PartitionStat.EXPECTED_QUERIES_SEEN, expectedQueriesSeen);
    if (size > 0) {
        stats.put(PartitionStat.LARGEST_PARTITION, sorted.get(size - 1).toString());
        stats.put(PartitionStat.SMALLEST_PARTITION, sorted.get(0).toString());
        double[] sizes = sorted.stream().mapToDouble(p -> (double) p.count).toArray();
        stats.put(PartitionStat.STDDEV_PARTITION_SIZE, new StandardDeviation().evaluate(sizes));
        stats.put(PartitionStat.DISTRIBUTION_PARTITION_SIZE, getDistributions(sorted));
    }
    return stats;
}

Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Tests slice sampling of a uniform prior with no data points.
 * Checks that mean and standard deviation are recovered from the samples
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfUniformPriorWithNoData() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 0.5;
    final double standardDeviation = 1. / Math.sqrt(12.);
    final List<Double> data = Collections.emptyList();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Double> uniformSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, (d, x) -> 0.,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(uniformSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.05);
}

Source File: ReCapSegCaller.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

6 votes

private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {

        //Get the segments that are likely copy neutral.
        // Math.abs removed to mimic python...
        final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());

        // Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
        //  can be far away from copy neutral
        final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage =
                new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
        final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream()
                .flatMap(s -> targetsWithCoverage.targets(s).stream())
                .mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();

        final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
        final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);

        // Now we filter outliers by only including those w/in 2 standard deviations.
        final double [] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio)
                .filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();

        return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
    }

Source File: ThirdOctaveBandsFilteringTest.java From NoiseCapture with GNU General Public License v3.0

6 votes

@Test
public void testPinkNoise() {
    short[] pinkNoise = SOSSignalProcessing.makePinkNoise(441000, (short)2500, 0);
    FFTSignalProcessing fftSignalProcessing = new FFTSignalProcessing(44100,
            ThirdOctaveBandsFiltering.STANDARD_FREQUENCIES_REDUCED, pinkNoise.length);
    fftSignalProcessing.addSample(pinkNoise);
    FFTSignalProcessing.ProcessingResult result = fftSignalProcessing.processSample(FFTSignalProcessing.WINDOW_TYPE.RECTANGULAR,
            false,
            false);

    // Compute
    StandardDeviation standardDeviation = new StandardDeviation();
    double[] dArray = new double[result.dBaLevels.length];
    for(int i = 0; i < result.dBaLevels.length; i++) {
        dArray[i] = result.dBaLevels[i];
    }
    assertEquals(0, standardDeviation.evaluate(dArray), 0.25);
}

Source File: SliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Tests slice sampling of a normal distribution.  Checks that input mean and standard deviation are recovered
 * by 10000 samples to a relative error of 0.5% and 2%, respectively.
 */
@Test
public void testSliceSamplingOfNormalDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
    final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numSamples = 10000;
    final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}

Source File: PosteriorSummaryUtils.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Given a list of posterior samples, returns an estimate of the posterior mode (using
 * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
 * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
 * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
 * {@link Double#NaN}, {@link Double#NaN} will be returned.
 * @param samples   posterior samples, cannot be {@code null} and number of samples must be greater than 0
 * @param ctx       {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
 */
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
    Utils.nonNull(samples);
    Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");

    //calculate sample min, max, mean, and standard deviation
    final double sampleMin = Collections.min(samples);
    final double sampleMax = Collections.max(samples);
    final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
    final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));

    //if samples are all the same or contain NaN, can simply return mean
    if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
        return sampleMean;
    }

    //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
    //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
    final double bandwidth =
            SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);

    //use kernel density estimation to approximate posterior from samples
    final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);

    //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
    final BrentOptimizer optimizer =
            new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
    final UnivariateObjectiveFunction objective =
            new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]);
    //search for mode within sample range, start near sample mean
    final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
    return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}

Source File: MathUtilities.java From StatsAgg with Apache License 2.0

6 votes

public static BigDecimal computePopulationStandardDeviationOfBigDecimals(List<BigDecimal> numbers) {
    
    if ((numbers == null) || numbers.isEmpty()) {
        return null;
    }
    
    try {
        double[] doublesArray = new double[numbers.size()];

        for (int i = 0; i < doublesArray.length; i++) {
            doublesArray[i] = numbers.get(i).doubleValue();
        }

        StandardDeviation standardDeviation = new StandardDeviation();
        standardDeviation.setBiasCorrected(false);
        BigDecimal standardDeviationResult = new BigDecimal(standardDeviation.evaluate(doublesArray));

        return standardDeviationResult;
    }
    catch (Exception e) {
        logger.error(e.toString() + System.lineSeparator() + StackTrace.getStringFromStackTrace(e));
        return null;
    }
}

Source File: DescriptiveStatisticsHistogramStatistics.java From flink with Apache License 2.0

6 votes

@Override
public double evaluate(double[] values, int begin, int length)
		throws MathIllegalArgumentException {
	this.count = length;
	percentilesImpl.setData(values, begin, length);

	SimpleStats secondMoment = new SimpleStats();
	secondMoment.evaluate(values, begin, length);
	this.mean = secondMoment.getMean();
	this.min = secondMoment.getMin();
	this.max = secondMoment.getMax();

	this.stddev = new StandardDeviation(secondMoment).getResult();

	return Double.NaN;
}

Source File: bedGraphMath.java From HMMRATAC with GNU General Public License v3.0

6 votes

private void setMeanAndStd(){
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	for (String chr : bedgraph.keySet()){
		
		ArrayList<TagNode> inTemp = bedgraph.get(chr);
		Collections.sort(inTemp,  TagNode.basepairComparator);
		for (int i = 0; i < inTemp.size();i++){
			int length = inTemp.get(i).getLength();
			double value = inTemp.get(i).getScore2();
			for (int a = 0; a < length;a++){
				mu.increment(value);
				dev.increment(value);
			}
		}
	}
	mean = mu.getResult();
	std = dev.getResult();
}

Source File: MeanAndStd.java From HMMRATAC with GNU General Public License v3.0

6 votes

/**
 * Set the data across a specific region
 * @param node a TagNode representing a specific region for calculation
 * @throws IOException
 */
private void SetMeanAndStd2(TagNode node) throws IOException{
	BBFileReader wigReader = new BBFileReader(wigFile);
	String chrom = node.getChrom();
	int begin = node.getStart();
	int end = node.getStop();
	BigWigIterator iter = wigReader.getBigWigIterator(chrom, begin, chrom, end, false);
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	while(iter.hasNext()){
		WigItem item = iter.next();
		int start = item.getStartBase();
		int stop = item.getEndBase();
		double value = item.getWigValue();
		for (int i = start; i < stop;i++){
			mu.increment(value);
			dev.increment(value);
		}
		
	}
	mean = mu.getResult();
	std = dev.getResult();
	wigReader=null;
}

Source File: MeanAndStd.java From HMMRATAC with GNU General Public License v3.0

6 votes

/**
 * Set the data across the entire genome
 * @throws IOException
 */
private void SetMeanAndStd() throws IOException{
	BBFileReader wigReader = new BBFileReader(wigFile);
	BigWigIterator iter = wigReader.getBigWigIterator();
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	while(iter.hasNext()){
		WigItem item = iter.next();
		int start = item.getStartBase();
		int stop = item.getEndBase();
		double value = item.getWigValue();
		for (int i = start; i < stop;i++){
			mu.increment(value);
			dev.increment(value);
		}
		
	}
	mean = mu.getResult();
	std = dev.getResult();
}

Source File: PosteriorSummaryUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Given a list of posterior samples, returns an estimate of the posterior mode (using
 * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
 * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
 * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
 * {@link Double#NaN}, {@link Double#NaN} will be returned.
 * @param samples   posterior samples, cannot be {@code null} and number of samples must be greater than 0
 * @param ctx       {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
 */
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
    Utils.nonNull(samples);
    Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");

    //calculate sample min, max, mean, and standard deviation
    final double sampleMin = Collections.min(samples);
    final double sampleMax = Collections.max(samples);
    final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
    final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));

    //if samples are all the same or contain NaN, can simply return mean
    if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
        return sampleMean;
    }

    //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
    //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
    final double bandwidth =
            SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);

    //use kernel density estimation to approximate posterior from samples
    final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);

    //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
    final BrentOptimizer optimizer =
            new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
    final UnivariateObjectiveFunction objective =
            new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]);
    //search for mode within sample range, start near sample mean
    final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
    return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}

Source File: UserProfileKDEModeler.java From Eagle with Apache License 2.0

5 votes

private void computeStats(RealMatrix m){
    if(m.getColumnDimension() !=  this.cmdTypes.length){
        LOG.error("Please fix the commands list in config file");
    }

    statistics = new UserCommandStatistics[m.getColumnDimension()];

    for(int i=0; i<m.getColumnDimension(); i++){
        UserCommandStatistics stats = new UserCommandStatistics();
        stats.setCommandName(this.cmdTypes[i]);
        RealVector colData = m.getColumnVector(i);
        StandardDeviation deviation = new StandardDeviation();
        double stddev = deviation.evaluate(colData.toArray());

        if(LOG.isDebugEnabled()) LOG.debug("Stddev is NAN ? " + (Double.isNaN(stddev) ? "yes" : "no"));
        if(stddev <= lowVarianceVal)
            stats.setLowVariant(true);
        else
            stats.setLowVariant(false);

        stats.setStddev(stddev);
        Mean mean = new Mean();
        double mu = mean.evaluate(colData.toArray());
        if(LOG.isDebugEnabled()) LOG.debug("mu is NAN ? " + (Double.isNaN(mu)? "yes":"no"));

        stats.setMean(mu);
        statistics[i]=stats;
    }
}

Source File: MessagePackDataformatTestBase.java From jackson-dataformat-msgpack with Apache License 2.0

5 votes

protected void printStat(String label, double[] values) {
    StandardDeviation standardDeviation = new StandardDeviation();
    System.out.println(label + ":");
    System.out.println(String.format("  mean : %.2f", StatUtils.mean(values)));
    System.out.println(String.format("  min  : %.2f", StatUtils.min(values)));
    System.out.println(String.format("  max  : %.2f", StatUtils.max(values)));
    System.out.println(String.format("  stdev: %.2f", standardDeviation.evaluate(values)));
    System.out.println("");
}

Source File: TestDoubleStdDevPopAggregation.java From presto with Apache License 2.0

5 votes

@Override
protected Number getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    StandardDeviation stdDev = new StandardDeviation(false);
    return stdDev.evaluate(values);
}

Source File: MatrixSummaryUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Return an array containing the variance for each row in the given matrix.
 * @param m Not {@code null}.  Size MxN.    If any entry is NaN, the corresponding rows will have a
 *          variance of NaN.
 * @return array of size M.  Never {@code null}  IF there is only one column (or only one entry
 */
public static double[] getRowVariances(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final StandardDeviation std = new StandardDeviation();
    return IntStream.range(0, m.getRowDimension()).boxed()
            .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}

Source File: RandomDNAUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

public void checkResults(final int[] results, final int n, final int m) {
    final double mean = Arrays.stream(results).average().getAsDouble();
    final double std = new StandardDeviation().evaluate(Arrays.stream(results).asDoubleStream().toArray());
    final double expectedMean = (n*m)/4.0;
    final double s = std; // not really because it's the population not the sample dtd but it'll do
    Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
    Assert.assertTrue(mean > expectedMean-2*s/Math.sqrt(n*m), "unexpected mean:" +mean);
}

Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Tests slice sampling of a normal posterior = uniform prior x normal likelihood from 1000 data points.
 * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfNormalPosterior() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(rng, mean, standardDeviation);
    final BiFunction<Double, Double, Double> normalLogLikelihood =
            (d, x) -> new NormalDistribution(null, x, standardDeviation).logDensity(d);
    final List<Double> data = Doubles.asList(normalDistribution.sample(NUM_DATA_POINTS));

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Double> normalSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, normalLogLikelihood,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation / Math.sqrt(NUM_DATA_POINTS)), 0., 0.05);
}

Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Tests slice sampling of a beta posterior = uniform prior x binomial likelihood from 1000 data points.
 * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfBetaPosterior() {
    rng.setSeed(RANDOM_SEED);

    final int numTrials = 100;
    final double p = 0.9;
    final BinomialDistribution binomialDistribution = new BinomialDistribution(rng, numTrials, p);
    final BiFunction<Integer, Double, Double> binomialLogLikelihood =
            (d, x) -> new BinomialDistribution(null, numTrials, x).logProbability(d);
    final List<Integer> data = Ints.asList(binomialDistribution.sample(NUM_DATA_POINTS));
    final double numSuccessesTotal = data.stream().mapToDouble(x -> x).sum();
    final double alpha = 1. + numSuccessesTotal;
    final double beta = 1. + (numTrials * NUM_DATA_POINTS - numSuccessesTotal);
    final double variance = new BetaDistribution(null, alpha, beta).getNumericalVariance();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Integer> betaSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, binomialLogLikelihood,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, p), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, Math.sqrt(variance)), 0., 0.05);
}

Source File: GibbsSamplerSingleGaussianUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Tests Bayesian inference of a Gaussian model via MCMC.  Recovery of input values for the variance and mean
 * global parameters is checked.  In particular, the mean and standard deviation of the posteriors for
 * both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
 * (after 250 burn-in samples have been discarded).
 */
@Test
public void testRunMCMCOnSingleGaussianModel() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler =
            new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();

    //Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
    //parameter name, type, and burn-in number to the getSamples method.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));

    //Check that the statistics---i.e., the means and standard deviations---of the posteriors
    //agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(
            relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
    final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
    Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(
            relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}

Source File: Endpoint.java From oryx with Apache License 2.0

5 votes

protected Endpoint(String path, double relativeProb) {
  Preconditions.checkArgument(relativeProb > 0.0);
  this.path = path;
  this.relativeProb = relativeProb;
  meanTimeNanos = new Mean();
  stdevTimeNanos = new StandardDeviation();
}

Source File: StandardDeviationFunction.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

public Double evaluate(Collection<?> values, boolean biasCorrected){
	StandardDeviation statistic = new StandardDeviation();
	statistic.setBiasCorrected(biasCorrected);

	for(Object value : values){
		Number number = (Number)TypeUtil.parseOrCast(DataType.DOUBLE, value);

		statistic.increment(number.doubleValue());
	}

	return statistic.getResult();
}

Source File: TestLongStdDevPopAggregation.java From presto with Apache License 2.0

5 votes

@Override
protected Number getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    StandardDeviation stdDev = new StandardDeviation(false);
    return stdDev.evaluate(values);
}

Source File: MatrixSummaryUtils.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Return an array containing the variance for each row in the given matrix.
 * @param m Not {@code null}.  Size MxN.    If any entry is NaN, the corresponding rows will have a
 *          variance of NaN.
 * @return array of size M.  Never {@code null}  IF there is only one column (or only one entry
 */
public static double[] getRowVariances(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final StandardDeviation std = new StandardDeviation();
    return IntStream.range(0, m.getRowDimension()).boxed()
            .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}

Source File: HMMR_EM.java From HMMRATAC with GNU General Public License v3.0

5 votes

/**
 * Perform one iteration of the EM algorithm
 */
public void iterate(){
	
	double[] temp = new double[mu.length];
	double[] counter = new double[mu.length];
	double total = 0.0;
	Mean[] means = new Mean[mu.length];
	StandardDeviation[] std = new StandardDeviation[mu.length];
	for (int i = 0; i < means.length;i++){
		means[i] = new Mean();
		std[i] = new StandardDeviation();
	}
	for (int i = 0;i < data.length;i++){
		
		for (int l = 0;l < mu.length;l++){
			temp[l] = getWeightedDensity(data[i],mu[l],lamda[l],weights[l]);
			
		}
		int index = returnGreater(temp);
		if (index != -1){
			means[index].increment(data[i]);
			std[index].increment(data[i]);
			counter[index]+=1.0;
			total+=1.0;
		}
	}
	for (int i = 0;i < mu.length;i++){
		mu[i] = mu[i] + (jump *(means[i].getResult()-mu[i]));
		lamda[i] = lamda[i] + (jump *(std[i].getResult() - lamda[i]));
		weights[i] = counter[i] / total;
	}
		
	
	
}

Source File: TestMfsBlockLoader.java From dremio-oss with Apache License 2.0

5 votes

private static double stdDev(List<Long> data) {
  double[] asDouble = new double[data.size()];
  int i = 0;
  for (Long l : data) {
    asDouble[i++] = (double) l;
  }
  return new StandardDeviation().evaluate(asDouble);
}

Source File: DBIDRouterTest.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

@Test
public void multipleShardsInTheCluster_shouldBalanceNodes()
{
    int [] shardIdentifiers = range(0,15).toArray();
    int shardCount = shardIdentifiers.length;
    int howManyDocumentsPerShard = 10000;

    Map<Integer, Integer> nodeDistributionMap = new HashMap<>();

    range(0, shardCount * howManyDocumentsPerShard)
            .mapToLong(Long::valueOf)
            .forEach(id -> {
                Node node = new Node();
                node.setId(id);
                stream(shardIdentifiers)
                        .forEach(shardId -> {
                            if (router.routeNode(shardCount, shardId, node))
                            {
                                nodeDistributionMap.merge(shardId, 1, Integer::sum);
                            }
                        });
            });

    StandardDeviation sd = new StandardDeviation();
    double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
    double norm = deviation/(howManyDocumentsPerShard) * 100;

    assertEquals(shardIdentifiers.length, nodeDistributionMap.size());

    // Asserts the standard deviation of the distribution map is in percentage lesser than 30%
    assertTrue(
            nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
            norm < 30);
}

org.apache.commons.math3.stat.descriptive.moment.StandardDeviation Java Examples