org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#addValue

Source File: BollingerBandProcessor.java From FX-AlgorithmTrading with MIT License

6 votes

/**
 * MAの値を計算します
 * @param maPeriod
 * @param closeList
 * @param symbol
 * @return
 */
private void calcBollingerBand(BollingerPeriod calcPeriod, List<Double> closeList, Symbol symbol, BollingerBandIndicator indicator) {
    // N期間のデータを使用して計算する
    if (closeList.size() < calcPeriod.getPeriod()) {
        // 期間が足りない場合はNaNを入れる
        indicator.addValueData(calcPeriod, Double.NaN, Double.NaN, Double.NaN, Double.NaN);
        return;
    }

    List<Double> subList = CollectionUtility.lastSubListView(closeList, calcPeriod.getPeriod());
    DescriptiveStatistics statistics = new DescriptiveStatistics(calcPeriod.getPeriod());
    for (Double d : subList) {
        statistics.addValue(d);
    }
    double sma = symbol.roundSubPips(statistics.getMean());
    double sigma = symbol.roundSubPips(statistics.getStandardDeviation());
    double plus = sma + sigma;
    double minus = sma - sigma;
    // add data
    indicator.addValueData(calcPeriod, sma, sigma, plus, minus);
}

Source File: StatUtilsTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
 * precision of 1E-10.
 */

@Test
public void testNormalize2() {
    // create an sample with 77 values    
    int length = 77;
    double sample[] = new double[length];
    for (int i = 0; i < length; i++) {
        sample[i] = Math.random();
    }
    // normalize this sample
    double standardizedSample[] = StatUtils.normalize(sample);

    DescriptiveStatistics stats = new DescriptiveStatistics();
    // Add the data from the array
    for (int i = 0; i < length; i++) {
        stats.addValue(standardizedSample[i]);
    }
    // the calculations do have a limited precision    
    double distance = 1E-10;
    // check the mean an standard deviation
    Assert.assertEquals(0.0, stats.getMean(), distance);
    Assert.assertEquals(1.0, stats.getStandardDeviation(), distance);

}

Source File: Spectrum.java From cineast with MIT License

6 votes

/**
 * Find local maxima in the spectrum and returns the indices of those maxima as integer
 * array.
 *
 * @param threshold Threshold for search. Values bellow that threshold won't be considered.
 * @return Array containing indices (zero-based) of local maxima.
 */
public List<Pair<Float, Double>> findLocalMaxima(double threshold, boolean significant) {
    List<Pair<Float,Double>> peaks = new ArrayList<>();
    for (int i=1;i<this.spectrum.length-1;i++) {
        if (this.spectrum[i] < threshold) {
          continue;
        }
        if (spectrum[i] > Math.max(spectrum[i+1], spectrum[i-1])) {
          peaks.add(this.get(i));
        }
    }

    if (significant) {
        DescriptiveStatistics statistics = new DescriptiveStatistics();
        for (Pair<Float, Double> peak : peaks) {
            statistics.addValue(peak.second);
        }
        final double mean = statistics.getMean();
        final double stddev = statistics.getStandardDeviation();
        peaks.removeIf(p -> p.second < (mean + stddev * 2));
    }

    return peaks;
}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: ChoquisticRelevanceLoss.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public double loss(final List<? extends int[]> expected, final List<? extends IMultiLabelClassification> actual) {
	this.checkConsistency(expected, actual);
	DescriptiveStatistics stats = new DescriptiveStatistics();
	for (int i = 0; i < expected.size(); i++) {
		stats.addValue(this.instanceLoss(expected.get(i), actual.get(i).getPrediction()));
	}
	return stats.getMean();
}

Source File: MovingMedianEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  }
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
  }
  if(!(first instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a List",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }
  if(!(second instanceof Number)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }

  List<?> values = (List<?>)first;
  int window = ((Number)second).intValue();

  List<Number> moving = new ArrayList<>();
  DescriptiveStatistics slider = new DescriptiveStatistics(window);
  Percentile percentile = new Percentile();
  for(Object value : values){
    slider.addValue(((Number)value).doubleValue());
    if(slider.getN() >= window){
      double median = percentile.evaluate(slider.getValues(), 50);
      moving.add(median);
    }
  }

  return moving;
}

Source File: MovingMADEvaluator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  }
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
  }
  if(!(first instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a List",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }
  if(!(second instanceof Number)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }

  List<?> values = (List<?>)first;
  int window = ((Number)second).intValue();

  List<Number> moving = new ArrayList<>();
  DescriptiveStatistics slider = new DescriptiveStatistics(window);
  for(Object value : values){
    slider.addValue(((Number)value).doubleValue());
    if(slider.getN() >= window){
      double[] doubles = slider.getValues();
      double mean = slider.getMean();
      double total = 0;
      for(double d : doubles) {
        total+=Math.abs(d-mean);
      }
      moving.add(total/window);
    }
  }

  return moving;
}

Source File: StellarStatisticsFunctionsTest.java From metron with Apache License 2.0

5 votes

@Test
public void testMergeProviders() {
  List<StatisticsProvider> providers = new ArrayList<>();
  /*
  Create 10 providers, each with a sample drawn from a gaussian distribution.
  Update the reference stats from commons math to ensure we are
   */
  GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L));
  SummaryStatistics sStatistics= new SummaryStatistics();
  DescriptiveStatistics dStatistics = new DescriptiveStatistics();
  for(int i = 0;i < 10;++i) {
    List<Double> sample = new ArrayList<>();
    for(int j = 0;j < 100;++j) {
      double s = gaussian.nextNormalizedDouble();
      sample.add(s);
      sStatistics.addValue(s);
      dStatistics.addValue(s);
    }
    StatisticsProvider provider = (StatisticsProvider)run("STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")"
                                                         , new HashMap<>()
                                                         );
    providers.add(provider);
  }

  /*
  Merge the providers and validate
   */
  Map<String, Object> providerVariables = new HashMap<>();
  for(int i = 0;i < providers.size();++i) {
    providerVariables.put("provider_" + i, providers.get(i));
  }
  StatisticsProvider mergedProvider =
          (StatisticsProvider)run("STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])"
                                 , providerVariables
                                 );
  OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics , dStatistics);

}

Source File: FixedCommitmentPolicy.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public void updatePath(final ILabeledPath<N, A> path, final Double playout, final int pathLength) {
	for (N node : path.getNodes()) {
		DescriptiveStatistics statsOfNode = this.observationsPerNode.computeIfAbsent(node, n -> new DescriptiveStatistics());
		statsOfNode.addValue(playout);
	}
}

Source File: DNGMCTSPluginModel.java From AILibs with GNU Affero General Public License v3.0

5 votes

public DescriptiveStatistics getObservationStatisticsOfNode(final String node) {
	DescriptiveStatistics stats = new DescriptiveStatistics();
	for (double val : this.listOfObersvationsPerNode.get(node)) {
		stats.addValue(val);
	}
	return stats;
}

Source File: OWARelevanceLoss.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public double loss(final List<? extends int[]> expected, final List<? extends IMultiLabelClassification> actual) {
	this.checkConsistency(expected, actual);
	DescriptiveStatistics stats = new DescriptiveStatistics();
	for (int i = 0; i < expected.size(); i++) {
		stats.addValue(this.instanceLoss(expected.get(i), actual.get(i).getPrediction()));
	}
	return stats.getMean();
}

Source File: PLNetDyadRanker.java From AILibs with GNU Affero General Public License v3.0

5 votes

/**
 * Computes the average error on a set of dyad rankings in terms on the negative
 * log likelihood (NLL).
 *
 * @param drTest
 *            Test data on which the error should be computed given as a
 *            {@link List} of {@link IDyadRankingInstance}
 * @return Average error on the given test data
 */
private double computeAvgError(final List<INDArray> drTest) {
	DescriptiveStatistics stats = new DescriptiveStatistics();
	for (INDArray dyadRankingInstance : drTest) {
		INDArray outputs = this.plNet.output(dyadRankingInstance);
		outputs = outputs.transpose();
		double score = PLNetLoss.computeLoss(outputs).getDouble(0);
		stats.addValue(score);
	}
	return stats.getMean();
}

Source File: ColumnarStructureX.java From mmtf-spark with Apache License 2.0

5 votes

/**
 * Returns z-scores for B-factors (normalized B-factors).
 * 
 * Critical z-score values: Confidence level Tail Area z critical 
 *                          90%              0.05      +-1.645 
 *                          95%              0.025     +-1.96 
 *                          99%              0.005     +-2.576
 * 
 * @return
 */
public float[] getNormalizedbFactors() {
    if (normalizedbFactors == null) {
        normalizedbFactors = new float[getNumAtoms()];

        float[] bFactors = getbFactors();
        String[] types = getEntityTypes();

        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int i = 0; i < getNumAtoms(); i++) {
            if (! (types[i].equals("WAT"))) {
                stats.addValue(bFactors[i]);
            }
        }
        double mean = stats.getMean();
        double stddev = stats.getStandardDeviation();

        if (stddev > EPSILON) {
            for (int i = 0; i < getNumAtoms(); i++) {
                normalizedbFactors[i] = (float) ((bFactors[i] - mean) / stddev);
            }
        } else {
            Arrays.fill(normalizedbFactors, Float.MAX_VALUE);
        }
    }

    return normalizedbFactors;
}

Source File: Main.java From Java-for-Data-Science with MIT License

5 votes

public void usingApacaheCommonstoCalculateMedian() {
    // Using Apache Commons to find median
    double[] testData = {12.5, 18.3, 11.2, 19.0, 22.1, 14.3, 16.2, 12.5, 17.8, 16.5, 12.5};
    DescriptiveStatistics statTest
            = new SynchronizedDescriptiveStatistics();
    for (double num : testData) {
        statTest.addValue(num);
    }
    out.println("The median is " + statTest.getPercentile(50));
}

Source File: Writer.java From metron with Apache License 2.0

5 votes

public DescriptiveStatistics getStats(List<Double> avg) {
  DescriptiveStatistics stats = new DescriptiveStatistics();
  for(Double d : avg) {
    if(d == null || Double.isNaN(d)) {
      continue;
    }
    stats.addValue(d);
  }
  return stats;
}

Source File: StableRandomGeneratorTest.java From astor with GNU General Public License v2.0

5 votes

/**
 * If alpha = 1, than it must be Cauchy distribution
 */
@Test
public void testCauchyCase() {
    StableRandomGenerator generator = new StableRandomGenerator(rg, 1d, 0.0);
    DescriptiveStatistics summary = new DescriptiveStatistics();

    for (int i = 0; i < sampleSize; ++i) {
        double sample = generator.nextNormalizedDouble();
        summary.addValue(sample);
    }

    // Standard Cauchy distribution should have zero median and mode
    double median = summary.getPercentile(50);
    Assert.assertEquals(0.0, median, 0.2);
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: SalesforceSource.java From incubator-gobblin with Apache License 2.0

4 votes

String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions, long lowWatermark,
    long expectedHighWatermark) {
  int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions);
  int totalGroups = histogram.getGroups().size();

  log.info("Histogram total record count: " + histogram.totalRecordCount);
  log.info("Histogram total groups: " + totalGroups);
  log.info("maxPartitions: " + maxPartitions);
  log.info("interval: " + interval);

  List<HistogramGroup> groups = histogram.getGroups();
  List<String> partitionPoints = new ArrayList<>();
  DescriptiveStatistics statistics = new DescriptiveStatistics();

  int count = 0;
  HistogramGroup group;
  Iterator<HistogramGroup> it = groups.iterator();

  while (it.hasNext()) {
    group = it.next();
    if (count == 0) {
      // Add a new partition point;
      partitionPoints.add(Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
    }

    /**
     * Using greedy algorithm by keep adding group until it exceeds the interval size (x2)
     * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group,
     * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size)
     * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient.
     */
    if (count != 0 && count + group.count >= 2 * interval) {
      // Summarize current group
      statistics.addValue(count);
      // A step-in start
      partitionPoints.add(Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
      count = group.count;
    } else {
      // Add group into current partition
      count += group.count;
    }

    if (count >= interval) {
      // Summarize current group
      statistics.addValue(count);
      // A fresh start next time
      count = 0;
    }
  }

  if (partitionPoints.isEmpty()) {
    throw new RuntimeException("Unexpected empty partition list");
  }

  if (count > 0) {
    // Summarize last group
    statistics.addValue(count);
  }

  // Add global high watermark as last point
  partitionPoints.add(Long.toString(expectedHighWatermark));

  log.info("Dynamic partitioning statistics: ");
  log.info("data: " + Arrays.toString(statistics.getValues()));
  log.info(statistics.toString());
  String specifiedPartitions = Joiner.on(",").join(partitionPoints);
  log.info("Calculated specified partitions: " + specifiedPartitions);
  return specifiedPartitions;
}

Java Code Examples for org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#addValue()