Java Code Examples for org.apache.commons.math3.stat.descriptive.SummaryStatistics#getN()
The following examples show how to use
org.apache.commons.math3.stat.descriptive.SummaryStatistics#getN() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Stats.java From tablesaw with Apache License 2.0 | 6 votes |
private static Stats getStats(NumericColumn<?> values, SummaryStatistics summaryStatistics) { Stats stats = new Stats("Column: " + values.name()); stats.min = summaryStatistics.getMin(); stats.max = summaryStatistics.getMax(); stats.n = summaryStatistics.getN(); stats.sum = summaryStatistics.getSum(); stats.variance = summaryStatistics.getVariance(); stats.populationVariance = summaryStatistics.getPopulationVariance(); stats.quadraticMean = summaryStatistics.getQuadraticMean(); stats.geometricMean = summaryStatistics.getGeometricMean(); stats.mean = summaryStatistics.getMean(); stats.standardDeviation = summaryStatistics.getStandardDeviation(); stats.sumOfLogs = summaryStatistics.getSumOfLogs(); stats.sumOfSquares = summaryStatistics.getSumsq(); stats.secondMoment = summaryStatistics.getSecondMoment(); return stats; }
Example 2
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Generates a random value from this distribution. * <strong>Preconditions:</strong><ul> * <li>the distribution must be loaded before invoking this method</li></ul> * @return the random value. * @throws MathIllegalStateException if the distribution has not been loaded */ public double getNextValue() throws MathIllegalStateException { if (!loaded) { throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED); } // Start with a uniformly distributed random number in (0,1) double x = randomData.nextUniform(0,1); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return randomData.nextGaussian (stats.getMean(),stats.getStandardDeviation()); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED); }
Example 3
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Generates a random value from this distribution. * <strong>Preconditions:</strong><ul> * <li>the distribution must be loaded before invoking this method</li></ul> * @return the random value. * @throws MathIllegalStateException if the distribution has not been loaded */ public double getNextValue() throws MathIllegalStateException { if (!loaded) { throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED); } // Start with a uniformly distributed random number in (0,1) final double x = randomData.nextUniform(0,1); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return getKernel(stats).sample(); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED); }
Example 4
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Generates a random value from this distribution. * <strong>Preconditions:</strong><ul> * <li>the distribution must be loaded before invoking this method</li></ul> * @return the random value. * @throws MathIllegalStateException if the distribution has not been loaded */ public double getNextValue() throws MathIllegalStateException { if (!loaded) { throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED); } // Start with a uniformly distributed random number in (0,1) final double x = randomData.nextUniform(0,1); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return getKernel(stats).sample(); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED); }
Example 5
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Generates a random value from this distribution. * <strong>Preconditions:</strong><ul> * <li>the distribution must be loaded before invoking this method</li></ul> * @return the random value. * @throws MathIllegalStateException if the distribution has not been loaded */ public double getNextValue() throws MathIllegalStateException { if (!loaded) { throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED); } // Start with a uniformly distributed random number in (0,1) double x = randomData.nextUniform(0,1); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return randomData.nextGaussian (stats.getMean(),stats.getStandardDeviation()); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED); }
Example 6
Source File: RandomDataTest.java From astor with GNU General Public License v2.0 | 6 votes |
/** test failure modes and distribution of nextGaussian() */ @Test public void testNextGaussian() { try { randomData.nextGaussian(0, 0); Assert.fail("zero sigma -- MathIllegalArgumentException expected"); } catch (MathIllegalArgumentException ex) { // ignored } SummaryStatistics u = new SummaryStatistics(); for (int i = 0; i < largeSampleSize; i++) { u.addValue(randomData.nextGaussian(0, 1)); } double xbar = u.getMean(); double s = u.getStandardDeviation(); double n = u.getN(); /* * t-test at .001-level TODO: replace with externalized t-test, with * test statistic defined in TestStatistic */ Assert.assertTrue(FastMath.abs(xbar) / (s / FastMath.sqrt(n)) < 3.29); }
Example 7
Source File: AbstractOwlSim.java From owltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * This function will take an aggregated collection of Summary Statistics * and will generate a derived {@link SummaryStatistic} based on a flag for the * desired summation. This is particularly helpful for finding out the * means of the individual statistics of the collection. * For example, if you wanted to find out the mean of means of the collection * you would call this function like <p> * getSummaryStatisticsForCollection(aggregate,1).getMean(); <p> * Or if you wanted to determine the max number of annotations per * individual, you could call: <p> * getSummaryStatisticsForCollection(aggregate,5).getMax(); <p> * The stat flag should be set to the particular individual statistic that should * be summarized over. * * @param aggregate The aggregated collection of summary statistics * @param stat Integer flag for the statistic (1:mean ; 2:sum; 3:min; 4:max; 5:N) * @return {@link SummaryStatistics} of the selected statistic */ public SummaryStatistics getSummaryStatisticsForCollection(Collection<SummaryStatistics> aggregate, Stat stat) { //LOG.info("Computing stats over collection of "+aggregate.size()+" elements ("+stat+"):"); //TODO: turn stat into enum int x = 0; //To save memory, I am using SummaryStatistics, which does not store the values, //but this could be changed to DescriptiveStatistics to see values //as well as other statistical functions like distributions SummaryStatistics stats = new SummaryStatistics(); Double v = 0.0; ArrayList<String> vals = new ArrayList(); for (SummaryStatistics s : aggregate) { switch (stat) { case MEAN : v= s.getMean(); stats.addValue(s.getMean()); break; case SUM : v=s.getSum(); stats.addValue(s.getSum()); break; case MIN : v=s.getMin(); stats.addValue(s.getMin()); break; case MAX : v=s.getMax(); stats.addValue(s.getMax()); break; case N : v= ((int)s.getN())*1.0; stats.addValue(s.getN()); break; }; //vals.add(v.toString()); }; //LOG.info("vals: "+vals.toString()); return stats; }
Example 8
Source File: Stats.java From tablesaw with Apache License 2.0 | 6 votes |
private static Stats getStats(NumericColumn<?> values, SummaryStatistics summaryStatistics) { Stats stats = new Stats("Column: " + values.name()); stats.min = summaryStatistics.getMin(); stats.max = summaryStatistics.getMax(); stats.n = summaryStatistics.getN(); stats.sum = summaryStatistics.getSum(); stats.variance = summaryStatistics.getVariance(); stats.populationVariance = summaryStatistics.getPopulationVariance(); stats.quadraticMean = summaryStatistics.getQuadraticMean(); stats.geometricMean = summaryStatistics.getGeometricMean(); stats.mean = summaryStatistics.getMean(); stats.standardDeviation = summaryStatistics.getStandardDeviation(); stats.sumOfLogs = summaryStatistics.getSumOfLogs(); stats.sumOfSquares = summaryStatistics.getSumsq(); stats.secondMoment = summaryStatistics.getSecondMoment(); return stats; }
Example 9
Source File: AbstractOwlSim.java From owltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void computeSystemStatsForSubgraph(OWLClass c) throws UnknownOWLClassException { Set<OWLNamedIndividual> insts = this.getAllElements(); LOG.info("Computing system stats for subgraph rooted at" + c.toString() +" with "+ insts.size() + " individuals"); // LOG.info("Creating singular stat scores for all IDspaces"); Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>(); SummaryStatistics subgraphStats = new SummaryStatistics(); for (OWLNamedIndividual i : insts) { SummaryStatistics statsPerIndividual = computeIndividualStatsForSubgraph(i,c); //put this individual into the aggregate if (statsPerIndividual.getN() == 0) { //LOG.info("No annotations found in this subgraph for Individual "+i.toStringID()); } else { //LOG.info(statsPerIndividual.getN()+" Annotations found in this subgraph for Individual "+i.toStringID()); aggregate.add(statsPerIndividual); } //TODO: put this individual into an idSpace aggregate //String idSpace = i.getIRI().getNamespace(); subgraphStats.addValue(statsPerIndividual.getMean()); } StatsPerIndividual myStats = new StatsPerIndividual(); myStats.mean = getSummaryStatisticsForCollection(aggregate,Stat.MEAN); myStats.sum = getSummaryStatisticsForCollection(aggregate,Stat.SUM); myStats.min = getSummaryStatisticsForCollection(aggregate,Stat.MIN); myStats.max = getSummaryStatisticsForCollection(aggregate,Stat.MAX); myStats.n = getSummaryStatisticsForCollection(aggregate,Stat.N); this.subgraphSummaryStatsPerIndividual.put(c, myStats); LOG.info("Finished omputing system stats for subgraph rooted at" + c.toString()); }
Example 10
Source File: AbstractOwlSim.java From owltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void computeSystemStats() throws UnknownOWLClassException { Set<OWLNamedIndividual> insts = this.getAllElements(); LOG.info("Computing system stats for " + insts.size() + " individuals"); LOG.info("Creating singular stat scores for all IDspaces"); Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>(); this.overallStats = new SummaryStatistics(); int counter = 0; for (OWLNamedIndividual i : insts) { counter++; SummaryStatistics statsPerIndividual = computeIndividualStats(i); //put this individual into the aggregate if (statsPerIndividual.getN() == 0) { LOG.error("No annotations found for Individual "+i.toStringID()); } else { aggregate.add(statsPerIndividual); } //TODO: put this individual into an idSpace aggregate // String idSpace = i.getIRI().getNamespace(); this.overallStats.addValue(statsPerIndividual.getMean()); if (counter % 1000 == 0) { LOG.info("Finished "+counter+" individuals"); } } // this.aggregateStatsPerIndividual = AggregateSummaryStatistics.aggregate(aggregate); StatsPerIndividual myStats = new StatsPerIndividual(); myStats.mean = getSummaryStatisticsForCollection(aggregate,Stat.MEAN); myStats.sum = getSummaryStatisticsForCollection(aggregate,Stat.SUM); myStats.min = getSummaryStatisticsForCollection(aggregate,Stat.MIN); myStats.max = getSummaryStatisticsForCollection(aggregate,Stat.MAX); myStats.n = getSummaryStatisticsForCollection(aggregate,Stat.N); myStats.aggregate = AggregateSummaryStatistics.aggregate(aggregate); this.overallSummaryStatsPerIndividual = myStats; LOG.info("Finished computing overall statsPerIndividual:\n"+this.getSummaryStatistics().toString()); }
Example 11
Source File: NumberMapFunctions.java From tablesaw with Apache License 2.0 | 5 votes |
default DoubleColumn bin(int binCount) { double[] histogram = new double[binCount]; EmpiricalDistribution distribution = new EmpiricalDistribution(binCount); distribution.load(asDoubleArray()); int k = 0; for (SummaryStatistics stats : distribution.getBinStats()) { histogram[k++] = stats.getN(); } return DoubleColumn.create(name() + "[binned]", histogram); }
Example 12
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 5 votes |
/** * The within-bin smoothing kernel. Returns a Gaussian distribution * parameterized by {@code bStats}, unless the bin contains only one * observation, in which case a constant distribution is returned. * * @param bStats summary statistics for the bin * @return within-bin kernel parameterized by bStats */ protected RealDistribution getKernel(SummaryStatistics bStats) { if (bStats.getN() == 1) { return new ConstantRealDistribution(bStats.getMean()); } else { return new NormalDistribution(randomData.getRandomGenerator(), bStats.getMean(), bStats.getStandardDeviation(), NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); } }
Example 13
Source File: EstimateRepairability.java From BART with MIT License | 5 votes |
private static double calcMeanCI(SummaryStatistics stats, double level) { try { TDistribution tDist = new TDistribution(stats.getN() - 1); double critVal = tDist.inverseCumulativeProbability(1.0 - (1 - level) / 2); return critVal * stats.getStandardDeviation() / Math.sqrt(stats.getN()); } catch (MathIllegalArgumentException e) { return Double.NaN; } }
Example 14
Source File: EmpiricalDistribution.java From astor with GNU General Public License v2.0 | 5 votes |
/** * The within-bin smoothing kernel. Returns a Gaussian distribution * parameterized by {@code bStats}, unless the bin contains only one * observation, in which case a constant distribution is returned. * * @param bStats summary statistics for the bin * @return within-bin kernel parameterized by bStats */ protected RealDistribution getKernel(SummaryStatistics bStats) { if (bStats.getN() == 1) { return new ConstantRealDistribution(bStats.getMean()); } else { return new NormalDistribution(randomData.getRandomGenerator(), bStats.getMean(), bStats.getStandardDeviation(), NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); } }
Example 15
Source File: NumberMapFunctions.java From tablesaw with Apache License 2.0 | 5 votes |
default DoubleColumn bin(int binCount) { double[] histogram = new double[binCount]; EmpiricalDistribution distribution = new EmpiricalDistribution(binCount); distribution.load(asDoubleArray()); int k = 0; for (SummaryStatistics stats : distribution.getBinStats()) { histogram[k++] = stats.getN(); } return DoubleColumn.create(name() + "[binned]", histogram); }
Example 16
Source File: InMemoryCacheStatistics.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
@Override public void add(String cacheName, TransactionStats txStats) { boolean registerCacheStats = false; WriteLock writeLock = getWriteLock(cacheName); writeLock.lock(); try { // Are we adding new stats for a previously unseen cache? registerCacheStats = !cacheToStatsMap.containsKey(cacheName); if (registerCacheStats) { // There are no statistics yet for this cache. cacheToStatsMap.put(cacheName, new HashMap<OpType, OperationStats>()); } Map<OpType, OperationStats> cacheStats = cacheToStatsMap.get(cacheName); for (OpType opType : OpType.values()) { SummaryStatistics txOpSummary = txStats.getTimings(opType); long count = txOpSummary.getN(); double totalTime = txOpSummary.getSum(); OperationStats oldStats = cacheStats.get(opType); OperationStats newStats; if (oldStats == null) { newStats = new OperationStats(totalTime, count); } else { newStats = new OperationStats(oldStats, totalTime, count); } cacheStats.put(opType, newStats); } } finally { writeLock.unlock(); } if (registerCacheStats) { // We've added stats for a previously unseen cache, raise an event // so that an MBean for the cache may be registered, for example. applicationContext.publishEvent(new CacheStatisticsCreated(this, cacheName)); } }
Example 17
Source File: OneWayAnova.java From astor with GNU General Public License v2.0 | 4 votes |
/** * This method actually does the calculations (except P-value). * * @param categoryData <code>Collection</code> of <code>double[]</code> * arrays each containing data for one category * @param allowOneElementData if true, allow computation for one catagory * only or for one data element per category * @return computed AnovaStats * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of * categories is less than 2 or a contained SummaryStatistics does not contain * at least two values */ private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData, final boolean allowOneElementData) throws NullArgumentException, DimensionMismatchException { MathUtils.checkNotNull(categoryData); if (!allowOneElementData) { // check if we have enough categories if (categoryData.size() < 2) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, categoryData.size(), 2); } // check if each category has enough data for (final SummaryStatistics array : categoryData) { if (array.getN() <= 1) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, (int) array.getN(), 2); } } } int dfwg = 0; double sswg = 0; double totsum = 0; double totsumsq = 0; int totnum = 0; for (final SummaryStatistics data : categoryData) { final double sum = data.getSum(); final double sumsq = data.getSumsq(); final int num = (int) data.getN(); totnum += num; totsum += sum; totsumsq += sumsq; dfwg += num - 1; final double ss = sumsq - ((sum * sum) / num); sswg += ss; } final double sst = totsumsq - ((totsum * totsum) / totnum); final double ssbg = sst - sswg; final int dfbg = categoryData.size() - 1; final double msbg = ssbg / dfbg; final double mswg = sswg / dfwg; final double F = msbg / mswg; return new AnovaStats(dfbg, dfwg, F); }
Example 18
Source File: OneWayAnova.java From astor with GNU General Public License v2.0 | 4 votes |
/** * This method actually does the calculations (except P-value). * * @param categoryData <code>Collection</code> of <code>double[]</code> * arrays each containing data for one category * @param allowOneElementData if true, allow computation for one catagory * only or for one data element per category * @return computed AnovaStats * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of * categories is less than 2 or a contained SummaryStatistics does not contain * at least two values */ private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData, final boolean allowOneElementData) throws NullArgumentException, DimensionMismatchException { MathUtils.checkNotNull(categoryData); if (!allowOneElementData) { // check if we have enough categories if (categoryData.size() < 2) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, categoryData.size(), 2); } // check if each category has enough data for (final SummaryStatistics array : categoryData) { if (array.getN() <= 1) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, (int) array.getN(), 2); } } } int dfwg = 0; double sswg = 0; double totsum = 0; double totsumsq = 0; int totnum = 0; for (final SummaryStatistics data : categoryData) { final double sum = data.getSum(); final double sumsq = data.getSumsq(); final int num = (int) data.getN(); totnum += num; totsum += sum; totsumsq += sumsq; dfwg += num - 1; final double ss = sumsq - ((sum * sum) / num); sswg += ss; } final double sst = totsumsq - ((totsum * totsum) / totnum); final double ssbg = sst - sswg; final int dfbg = categoryData.size() - 1; final double msbg = ssbg / dfbg; final double mswg = sswg / dfwg; final double F = msbg / mswg; return new AnovaStats(dfbg, dfwg, F); }
Example 19
Source File: OneWayAnova.java From astor with GNU General Public License v2.0 | 4 votes |
/** * This method actually does the calculations (except P-value). * * @param categoryData <code>Collection</code> of <code>double[]</code> * arrays each containing data for one category * @param allowOneElementData if true, allow computation for one catagory * only or for one data element per category * @return computed AnovaStats * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of * categories is less than 2 or a contained SummaryStatistics does not contain * at least two values */ private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData, final boolean allowOneElementData) throws NullArgumentException, DimensionMismatchException { MathUtils.checkNotNull(categoryData); if (!allowOneElementData) { // check if we have enough categories if (categoryData.size() < 2) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, categoryData.size(), 2); } // check if each category has enough data for (final SummaryStatistics array : categoryData) { if (array.getN() <= 1) { throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, (int) array.getN(), 2); } } } int dfwg = 0; double sswg = 0; double totsum = 0; double totsumsq = 0; int totnum = 0; for (final SummaryStatistics data : categoryData) { final double sum = data.getSum(); final double sumsq = data.getSumsq(); final int num = (int) data.getN(); totnum += num; totsum += sum; totsumsq += sumsq; dfwg += num - 1; final double ss = sumsq - ((sum * sum) / num); sswg += ss; } final double sst = totsumsq - ((totsum * totsum) / totnum); final double ssbg = sst - sswg; final int dfbg = categoryData.size() - 1; final double msbg = ssbg / dfbg; final double mswg = sswg / dfwg; final double F = msbg / mswg; return new AnovaStats(dfbg, dfwg, F); }
Example 20
Source File: DepthEstimator.java From cryptotrader with GNU Affero General Public License v3.0 | 3 votes |
@VisibleForTesting BigDecimal calculateDeviation(Context context, Request request) { Instant to = request.getCurrentTime(); Duration interval = Duration.between(to, request.getTargetTime()); Instant from = request.getCurrentTime().minus(interval.toMillis() * getSamples(), MILLIS); List<Trade> trades = context.listTrades(getKey(context, request), from.minus(interval)); NavigableMap<Instant, BigDecimal> prices = collapsePrices(trades, interval, from, to, false); NavigableMap<Instant, BigDecimal> returns = calculateReturns(prices); SummaryStatistics stats = new SummaryStatistics(); returns.values().stream().filter(Objects::nonNull).forEach(r -> stats.addValue(r.doubleValue())); if (stats.getN() <= 1) { return null; } double avg = stats.getMean(); double dev = stats.getStandardDeviation(); double sigma = new TDistribution(stats.getN() - 1).inverseCumulativeProbability(PROBABILITY); double sum = Math.abs(avg) + (dev * sigma); return Double.isFinite(sum) ? BigDecimal.valueOf(sum).setScale(SCALE, HALF_UP) : null; }