org.apache.commons.math3.stat.descriptive.moment.StandardDeviation Java Examples
The following examples show how to use
org.apache.commons.math3.stat.descriptive.moment.StandardDeviation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserProfileEigenModeler.java From Eagle with Apache License 2.0 | 6 votes |
private void computeStats(RealMatrix m){ if(m.getColumnDimension() != this.cmdTypes.length){ LOG.error("Please fix the commands list in config file"); return; } statistics = new UserCommandStatistics[m.getColumnDimension()]; for(int i=0; i<m.getColumnDimension(); i++){ UserCommandStatistics stats = new UserCommandStatistics(); stats.setCommandName(this.cmdTypes[i]); RealVector colData = m.getColumnVector(i); StandardDeviation deviation = new StandardDeviation(); double stddev = deviation.evaluate(colData.toArray()); //LOG.info("stddev is nan?" + (stddev == Double.NaN? "yes":"no")); if(stddev <= lowVarianceVal) stats.setLowVariant(true); else stats.setLowVariant(false); stats.setStddev(stddev); Mean mean = new Mean(); double mu = mean.evaluate(colData.toArray()); //LOG.info("mu is nan?" + (mu == Double.NaN? "yes":"no")); stats.setMean(mu); statistics[i] = stats; } }
Example #2
Source File: GCBiasCorrectorUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testGCCorrection() { final int numSamples = 5; final int numIntervals = 10000; final Pair<RealMatrix, double[]> data = simulateData(numSamples, numIntervals); final RealMatrix readCounts = data.getLeft(); final double[] intervalGCContent = data.getRight(); final RealMatrix correctedCoverage = readCounts.copy(); GCBiasCorrector.correctGCBias(correctedCoverage, intervalGCContent); Utils.nonNull(correctedCoverage); final StandardDeviation stdevEvaluator = new StandardDeviation(); final double[] stdDevsBySample = IntStream.range(0, correctedCoverage.getRowDimension()) .mapToDouble(r -> stdevEvaluator.evaluate(correctedCoverage.getRow(r))).toArray(); Arrays.stream(stdDevsBySample).forEach(x -> Assert.assertTrue(x < NON_GC_BIAS_NOISE_LEVEL * MEAN_READ_DEPTH)); //check that GC-bias correction is approximately idempotent -- if you correct again, very little should happen final RealMatrix recorrectedCoverage = correctedCoverage.copy(); GCBiasCorrector.correctGCBias(recorrectedCoverage, intervalGCContent); final double correctedChange = correctedCoverage.subtract(readCounts).getFrobeniusNorm(); final double recorrectedChange = recorrectedCoverage.subtract(correctedCoverage).getFrobeniusNorm(); Assert.assertTrue(recorrectedChange < correctedChange / 10.); }
Example #3
Source File: SliceSamplerUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Test slice sampling of a normal distribution. Checks that input mean and standard deviation are recovered * by 10000 samples to a relative error of 0.5% and 2%, respectively. */ @Test public void testSliceSamplingOfNormalDistribution() { rng.setSeed(RANDOM_SEED); final double mean = 5.; final double standardDeviation = 0.75; final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation); final Function<Double, Double> normalLogPDF = normalDistribution::logDensity; final double xInitial = 1.; final double xMin = Double.NEGATIVE_INFINITY; final double xMax = Double.POSITIVE_INFINITY; final double width = 0.5; final int numSamples = 10000; final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width); final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples)); final double sampleMean = new Mean().evaluate(samples); final double sampleStandardDeviation = new StandardDeviation().evaluate(samples); Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005); Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02); }
Example #4
Source File: QueryManager.java From bullet-core with Apache License 2.0 | 6 votes |
/** * Gets some statistics about the current state of partitioning and queries in this manager. * * @return A {@link Map} of {@link PartitionStat} to their values for the current state of the manager. */ public Map<PartitionStat, Object> getStats() { Map<PartitionStat, Object> stats = new HashMap<>(); List<Partition> sorted = partitioning.entrySet().stream().map(Partition::new).sorted().collect(Collectors.toList()); int size = sorted.size(); stats.put(PartitionStat.QUERY_COUNT, queries.size()); stats.put(PartitionStat.PARTITION_COUNT, size); stats.put(PartitionStat.ACTUAL_QUERIES_SEEN, queriesSeen); stats.put(PartitionStat.EXPECTED_QUERIES_SEEN, expectedQueriesSeen); if (size > 0) { stats.put(PartitionStat.LARGEST_PARTITION, sorted.get(size - 1).toString()); stats.put(PartitionStat.SMALLEST_PARTITION, sorted.get(0).toString()); double[] sizes = sorted.stream().mapToDouble(p -> (double) p.count).toArray(); stats.put(PartitionStat.STDDEV_PARTITION_SIZE, new StandardDeviation().evaluate(sizes)); stats.put(PartitionStat.DISTRIBUTION_PARTITION_SIZE, getDistributions(sorted)); } return stats; }
Example #5
Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Tests slice sampling of a uniform prior with no data points. * Checks that mean and standard deviation are recovered from the samples * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively. */ @Test public void testSliceSamplingOfUniformPriorWithNoData() { rng.setSeed(RANDOM_SEED); final double mean = 0.5; final double standardDeviation = 1. / Math.sqrt(12.); final List<Double> data = Collections.emptyList(); final double xInitial = 0.5; final double xMin = 0.; final double xMax = 1.; final double width = 0.1; final int numBurnInSamples = 500; final int numSamples = 1500; final MinibatchSliceSampler<Double> uniformSampler = new MinibatchSliceSampler<>( rng, data, UNIFORM_LOG_PRIOR, (d, x) -> 0., xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD); final double[] samples = Doubles.toArray(uniformSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples)); final double sampleMean = new Mean().evaluate(samples); final double sampleStandardDeviation = new StandardDeviation().evaluate(samples); Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01); Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.05); }
Example #6
Source File: ReCapSegCaller.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) { //Get the segments that are likely copy neutral. // Math.abs removed to mimic python... final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList()); // Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise, // can be far away from copy neutral final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage = new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList())); final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream() .flatMap(s -> targetsWithCoverage.targets(s).stream()) .mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray(); final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio); final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio); // Now we filter outliers by only including those w/in 2 standard deviations. final double [] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio) .filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray(); return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio); }
Example #7
Source File: ThirdOctaveBandsFilteringTest.java From NoiseCapture with GNU General Public License v3.0 | 6 votes |
@Test public void testPinkNoise() { short[] pinkNoise = SOSSignalProcessing.makePinkNoise(441000, (short)2500, 0); FFTSignalProcessing fftSignalProcessing = new FFTSignalProcessing(44100, ThirdOctaveBandsFiltering.STANDARD_FREQUENCIES_REDUCED, pinkNoise.length); fftSignalProcessing.addSample(pinkNoise); FFTSignalProcessing.ProcessingResult result = fftSignalProcessing.processSample(FFTSignalProcessing.WINDOW_TYPE.RECTANGULAR, false, false); // Compute StandardDeviation standardDeviation = new StandardDeviation(); double[] dArray = new double[result.dBaLevels.length]; for(int i = 0; i < result.dBaLevels.length; i++) { dArray[i] = result.dBaLevels[i]; } assertEquals(0, standardDeviation.evaluate(dArray), 0.25); }
Example #8
Source File: SliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Tests slice sampling of a normal distribution. Checks that input mean and standard deviation are recovered * by 10000 samples to a relative error of 0.5% and 2%, respectively. */ @Test public void testSliceSamplingOfNormalDistribution() { rng.setSeed(RANDOM_SEED); final double mean = 5.; final double standardDeviation = 0.75; final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation); final Function<Double, Double> normalLogPDF = normalDistribution::logDensity; final double xInitial = 1.; final double xMin = Double.NEGATIVE_INFINITY; final double xMax = Double.POSITIVE_INFINITY; final double width = 0.5; final int numSamples = 10000; final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width); final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples)); final double sampleMean = new Mean().evaluate(samples); final double sampleStandardDeviation = new StandardDeviation().evaluate(samples); Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005); Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02); }
Example #9
Source File: PosteriorSummaryUtils.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Given a list of posterior samples, returns an estimate of the posterior mode (using * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}). * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation), * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain * {@link Double#NaN}, {@link Double#NaN} will be returned. * @param samples posterior samples, cannot be {@code null} and number of samples must be greater than 0 * @param ctx {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation */ public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) { Utils.nonNull(samples); Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero."); //calculate sample min, max, mean, and standard deviation final double sampleMin = Collections.min(samples); final double sampleMax = Collections.max(samples); final double sampleMean = new Mean().evaluate(Doubles.toArray(samples)); final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples)); //if samples are all the same or contain NaN, can simply return mean if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) { return sampleMean; } //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT); //use kernel density estimation to approximate posterior from samples final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth); //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin)); final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]); //search for mode within sample range, start near sample mean final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean); return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint(); }
Example #10
Source File: MathUtilities.java From StatsAgg with Apache License 2.0 | 6 votes |
public static BigDecimal computePopulationStandardDeviationOfBigDecimals(List<BigDecimal> numbers) { if ((numbers == null) || numbers.isEmpty()) { return null; } try { double[] doublesArray = new double[numbers.size()]; for (int i = 0; i < doublesArray.length; i++) { doublesArray[i] = numbers.get(i).doubleValue(); } StandardDeviation standardDeviation = new StandardDeviation(); standardDeviation.setBiasCorrected(false); BigDecimal standardDeviationResult = new BigDecimal(standardDeviation.evaluate(doublesArray)); return standardDeviationResult; } catch (Exception e) { logger.error(e.toString() + System.lineSeparator() + StackTrace.getStringFromStackTrace(e)); return null; } }
Example #11
Source File: DescriptiveStatisticsHistogramStatistics.java From flink with Apache License 2.0 | 6 votes |
@Override public double evaluate(double[] values, int begin, int length) throws MathIllegalArgumentException { this.count = length; percentilesImpl.setData(values, begin, length); SimpleStats secondMoment = new SimpleStats(); secondMoment.evaluate(values, begin, length); this.mean = secondMoment.getMean(); this.min = secondMoment.getMin(); this.max = secondMoment.getMax(); this.stddev = new StandardDeviation(secondMoment).getResult(); return Double.NaN; }
Example #12
Source File: bedGraphMath.java From HMMRATAC with GNU General Public License v3.0 | 6 votes |
private void setMeanAndStd(){ Mean mu = new Mean(); StandardDeviation dev = new StandardDeviation(); for (String chr : bedgraph.keySet()){ ArrayList<TagNode> inTemp = bedgraph.get(chr); Collections.sort(inTemp, TagNode.basepairComparator); for (int i = 0; i < inTemp.size();i++){ int length = inTemp.get(i).getLength(); double value = inTemp.get(i).getScore2(); for (int a = 0; a < length;a++){ mu.increment(value); dev.increment(value); } } } mean = mu.getResult(); std = dev.getResult(); }
Example #13
Source File: MeanAndStd.java From HMMRATAC with GNU General Public License v3.0 | 6 votes |
/** * Set the data across a specific region * @param node a TagNode representing a specific region for calculation * @throws IOException */ private void SetMeanAndStd2(TagNode node) throws IOException{ BBFileReader wigReader = new BBFileReader(wigFile); String chrom = node.getChrom(); int begin = node.getStart(); int end = node.getStop(); BigWigIterator iter = wigReader.getBigWigIterator(chrom, begin, chrom, end, false); Mean mu = new Mean(); StandardDeviation dev = new StandardDeviation(); while(iter.hasNext()){ WigItem item = iter.next(); int start = item.getStartBase(); int stop = item.getEndBase(); double value = item.getWigValue(); for (int i = start; i < stop;i++){ mu.increment(value); dev.increment(value); } } mean = mu.getResult(); std = dev.getResult(); wigReader=null; }
Example #14
Source File: MeanAndStd.java From HMMRATAC with GNU General Public License v3.0 | 6 votes |
/** * Set the data across the entire genome * @throws IOException */ private void SetMeanAndStd() throws IOException{ BBFileReader wigReader = new BBFileReader(wigFile); BigWigIterator iter = wigReader.getBigWigIterator(); Mean mu = new Mean(); StandardDeviation dev = new StandardDeviation(); while(iter.hasNext()){ WigItem item = iter.next(); int start = item.getStartBase(); int stop = item.getEndBase(); double value = item.getWigValue(); for (int i = start; i < stop;i++){ mu.increment(value); dev.increment(value); } } mean = mu.getResult(); std = dev.getResult(); }
Example #15
Source File: PosteriorSummaryUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Given a list of posterior samples, returns an estimate of the posterior mode (using * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}). * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation), * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain * {@link Double#NaN}, {@link Double#NaN} will be returned. * @param samples posterior samples, cannot be {@code null} and number of samples must be greater than 0 * @param ctx {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation */ public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) { Utils.nonNull(samples); Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero."); //calculate sample min, max, mean, and standard deviation final double sampleMin = Collections.min(samples); final double sampleMax = Collections.max(samples); final double sampleMean = new Mean().evaluate(Doubles.toArray(samples)); final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples)); //if samples are all the same or contain NaN, can simply return mean if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) { return sampleMean; } //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT); //use kernel density estimation to approximate posterior from samples final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth); //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin)); final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]); //search for mode within sample range, start near sample mean final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean); return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint(); }
Example #16
Source File: UserProfileKDEModeler.java From Eagle with Apache License 2.0 | 5 votes |
private void computeStats(RealMatrix m){ if(m.getColumnDimension() != this.cmdTypes.length){ LOG.error("Please fix the commands list in config file"); } statistics = new UserCommandStatistics[m.getColumnDimension()]; for(int i=0; i<m.getColumnDimension(); i++){ UserCommandStatistics stats = new UserCommandStatistics(); stats.setCommandName(this.cmdTypes[i]); RealVector colData = m.getColumnVector(i); StandardDeviation deviation = new StandardDeviation(); double stddev = deviation.evaluate(colData.toArray()); if(LOG.isDebugEnabled()) LOG.debug("Stddev is NAN ? " + (Double.isNaN(stddev) ? "yes" : "no")); if(stddev <= lowVarianceVal) stats.setLowVariant(true); else stats.setLowVariant(false); stats.setStddev(stddev); Mean mean = new Mean(); double mu = mean.evaluate(colData.toArray()); if(LOG.isDebugEnabled()) LOG.debug("mu is NAN ? " + (Double.isNaN(mu)? "yes":"no")); stats.setMean(mu); statistics[i]=stats; } }
Example #17
Source File: MessagePackDataformatTestBase.java From jackson-dataformat-msgpack with Apache License 2.0 | 5 votes |
protected void printStat(String label, double[] values) { StandardDeviation standardDeviation = new StandardDeviation(); System.out.println(label + ":"); System.out.println(String.format(" mean : %.2f", StatUtils.mean(values))); System.out.println(String.format(" min : %.2f", StatUtils.min(values))); System.out.println(String.format(" max : %.2f", StatUtils.max(values))); System.out.println(String.format(" stdev: %.2f", standardDeviation.evaluate(values))); System.out.println(""); }
Example #18
Source File: TestDoubleStdDevPopAggregation.java From presto with Apache License 2.0 | 5 votes |
@Override protected Number getExpectedValue(int start, int length) { if (length == 0) { return null; } double[] values = new double[length]; for (int i = 0; i < length; i++) { values[i] = start + i; } StandardDeviation stdDev = new StandardDeviation(false); return stdDev.evaluate(values); }
Example #19
Source File: MatrixSummaryUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Return an array containing the variance for each row in the given matrix. * @param m Not {@code null}. Size MxN. If any entry is NaN, the corresponding rows will have a * variance of NaN. * @return array of size M. Never {@code null} IF there is only one column (or only one entry */ public static double[] getRowVariances(final RealMatrix m) { Utils.nonNull(m, "Cannot calculate medians on a null matrix."); final StandardDeviation std = new StandardDeviation(); return IntStream.range(0, m.getRowDimension()).boxed() .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray(); }
Example #20
Source File: RandomDNAUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void checkResults(final int[] results, final int n, final int m) { final double mean = Arrays.stream(results).average().getAsDouble(); final double std = new StandardDeviation().evaluate(Arrays.stream(results).asDoubleStream().toArray()); final double expectedMean = (n*m)/4.0; final double s = std; // not really because it's the population not the sample dtd but it'll do Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean); Assert.assertTrue(mean > expectedMean-2*s/Math.sqrt(n*m), "unexpected mean:" +mean); }
Example #21
Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Tests slice sampling of a normal posterior = uniform prior x normal likelihood from 1000 data points. * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively. */ @Test public void testSliceSamplingOfNormalPosterior() { rng.setSeed(RANDOM_SEED); final double mean = 5.; final double standardDeviation = 0.75; final NormalDistribution normalDistribution = new NormalDistribution(rng, mean, standardDeviation); final BiFunction<Double, Double, Double> normalLogLikelihood = (d, x) -> new NormalDistribution(null, x, standardDeviation).logDensity(d); final List<Double> data = Doubles.asList(normalDistribution.sample(NUM_DATA_POINTS)); final double xInitial = 1.; final double xMin = Double.NEGATIVE_INFINITY; final double xMax = Double.POSITIVE_INFINITY; final double width = 0.5; final int numBurnInSamples = 500; final int numSamples = 1500; final MinibatchSliceSampler<Double> normalSampler = new MinibatchSliceSampler<>( rng, data, UNIFORM_LOG_PRIOR, normalLogLikelihood, xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD); final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples)); final double sampleMean = new Mean().evaluate(samples); final double sampleStandardDeviation = new StandardDeviation().evaluate(samples); Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01); Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation / Math.sqrt(NUM_DATA_POINTS)), 0., 0.05); }
Example #22
Source File: MinibatchSliceSamplerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Tests slice sampling of a beta posterior = uniform prior x binomial likelihood from 1000 data points. * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively. */ @Test public void testSliceSamplingOfBetaPosterior() { rng.setSeed(RANDOM_SEED); final int numTrials = 100; final double p = 0.9; final BinomialDistribution binomialDistribution = new BinomialDistribution(rng, numTrials, p); final BiFunction<Integer, Double, Double> binomialLogLikelihood = (d, x) -> new BinomialDistribution(null, numTrials, x).logProbability(d); final List<Integer> data = Ints.asList(binomialDistribution.sample(NUM_DATA_POINTS)); final double numSuccessesTotal = data.stream().mapToDouble(x -> x).sum(); final double alpha = 1. + numSuccessesTotal; final double beta = 1. + (numTrials * NUM_DATA_POINTS - numSuccessesTotal); final double variance = new BetaDistribution(null, alpha, beta).getNumericalVariance(); final double xInitial = 0.5; final double xMin = 0.; final double xMax = 1.; final double width = 0.1; final int numBurnInSamples = 500; final int numSamples = 1500; final MinibatchSliceSampler<Integer> betaSampler = new MinibatchSliceSampler<>( rng, data, UNIFORM_LOG_PRIOR, binomialLogLikelihood, xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD); final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples)); final double sampleMean = new Mean().evaluate(samples); final double sampleStandardDeviation = new StandardDeviation().evaluate(samples); Assert.assertEquals(relativeError(sampleMean, p), 0., 0.01); Assert.assertEquals(relativeError(sampleStandardDeviation, Math.sqrt(variance)), 0., 0.05); }
Example #23
Source File: GibbsSamplerSingleGaussianUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Tests Bayesian inference of a Gaussian model via MCMC. Recovery of input values for the variance and mean * global parameters is checked. In particular, the mean and standard deviation of the posteriors for * both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples * (after 250 burn-in samples have been discarded). */ @Test public void testRunMCMCOnSingleGaussianModel() { //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data. final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList); //Create a GibbsSampler, passing the total number of samples (including burn-in samples) //and the model held by the Modeller. final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model); //Run the MCMC. gibbsSampler.runMCMC(); //Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the //parameter name, type, and burn-in number to the getSamples method. final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN)); final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN)); //Check that the statistics---i.e., the means and standard deviations---of the posteriors //agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively. final double variancePosteriorCenter = new Mean().evaluate(varianceSamples); final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples); Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS); Assert.assertEquals( relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS); final double meanPosteriorCenter = new Mean().evaluate(meanSamples); final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples); Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS); Assert.assertEquals( relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS); }
Example #24
Source File: Endpoint.java From oryx with Apache License 2.0 | 5 votes |
protected Endpoint(String path, double relativeProb) { Preconditions.checkArgument(relativeProb > 0.0); this.path = path; this.relativeProb = relativeProb; meanTimeNanos = new Mean(); stdevTimeNanos = new StandardDeviation(); }
Example #25
Source File: StandardDeviationFunction.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
public Double evaluate(Collection<?> values, boolean biasCorrected){ StandardDeviation statistic = new StandardDeviation(); statistic.setBiasCorrected(biasCorrected); for(Object value : values){ Number number = (Number)TypeUtil.parseOrCast(DataType.DOUBLE, value); statistic.increment(number.doubleValue()); } return statistic.getResult(); }
Example #26
Source File: TestLongStdDevPopAggregation.java From presto with Apache License 2.0 | 5 votes |
@Override protected Number getExpectedValue(int start, int length) { if (length == 0) { return null; } double[] values = new double[length]; for (int i = 0; i < length; i++) { values[i] = start + i; } StandardDeviation stdDev = new StandardDeviation(false); return stdDev.evaluate(values); }
Example #27
Source File: MatrixSummaryUtils.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Return an array containing the variance for each row in the given matrix. * @param m Not {@code null}. Size MxN. If any entry is NaN, the corresponding rows will have a * variance of NaN. * @return array of size M. Never {@code null} IF there is only one column (or only one entry */ public static double[] getRowVariances(final RealMatrix m) { Utils.nonNull(m, "Cannot calculate medians on a null matrix."); final StandardDeviation std = new StandardDeviation(); return IntStream.range(0, m.getRowDimension()).boxed() .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray(); }
Example #28
Source File: HMMR_EM.java From HMMRATAC with GNU General Public License v3.0 | 5 votes |
/** * Perform one iteration of the EM algorithm */ public void iterate(){ double[] temp = new double[mu.length]; double[] counter = new double[mu.length]; double total = 0.0; Mean[] means = new Mean[mu.length]; StandardDeviation[] std = new StandardDeviation[mu.length]; for (int i = 0; i < means.length;i++){ means[i] = new Mean(); std[i] = new StandardDeviation(); } for (int i = 0;i < data.length;i++){ for (int l = 0;l < mu.length;l++){ temp[l] = getWeightedDensity(data[i],mu[l],lamda[l],weights[l]); } int index = returnGreater(temp); if (index != -1){ means[index].increment(data[i]); std[index].increment(data[i]); counter[index]+=1.0; total+=1.0; } } for (int i = 0;i < mu.length;i++){ mu[i] = mu[i] + (jump *(means[i].getResult()-mu[i])); lamda[i] = lamda[i] + (jump *(std[i].getResult() - lamda[i])); weights[i] = counter[i] / total; } }
Example #29
Source File: TestMfsBlockLoader.java From dremio-oss with Apache License 2.0 | 5 votes |
private static double stdDev(List<Long> data) { double[] asDouble = new double[data.size()]; int i = 0; for (Long l : data) { asDouble[i++] = (double) l; } return new StandardDeviation().evaluate(asDouble); }
Example #30
Source File: DBIDRouterTest.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
@Test public void multipleShardsInTheCluster_shouldBalanceNodes() { int [] shardIdentifiers = range(0,15).toArray(); int shardCount = shardIdentifiers.length; int howManyDocumentsPerShard = 10000; Map<Integer, Integer> nodeDistributionMap = new HashMap<>(); range(0, shardCount * howManyDocumentsPerShard) .mapToLong(Long::valueOf) .forEach(id -> { Node node = new Node(); node.setId(id); stream(shardIdentifiers) .forEach(shardId -> { if (router.routeNode(shardCount, shardId, node)) { nodeDistributionMap.merge(shardId, 1, Integer::sum); } }); }); StandardDeviation sd = new StandardDeviation(); double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); double norm = deviation/(howManyDocumentsPerShard) * 100; assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); // Asserts the standard deviation of the distribution map is in percentage lesser than 30% assertTrue( nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%", norm < 30); }