org.apache.commons.math.stat.descriptive.DescriptiveStatistics#addValue

Source File: HeatMapTask.java From mzmine3 with GNU General Public License v2.0

6 votes

private void scale(double[][] peakList) {
  DescriptiveStatistics stdDevStats = new DescriptiveStatistics();

  for (int columns = 0; columns < peakList.length; columns++) {
    stdDevStats.clear();
    for (int row = 0; row < peakList[columns].length; row++) {
      if (!Double.isInfinite(peakList[columns][row]) && !Double.isNaN(peakList[columns][row])) {
        stdDevStats.addValue(peakList[columns][row]);
      }
    }

    double stdDev = stdDevStats.getStandardDeviation();

    for (int row = 0; row < peakList[columns].length; row++) {
      if (stdDev != 0) {
        peakList[columns][row] = peakList[columns][row] / stdDev;
      }
    }
  }
}

Source File: StatUtilsTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
 * precision of 1E-10.
 */

@Test
public void testNormalize2() {
    // create an sample with 77 values    
    int length = 77;
    double sample[] = new double[length];
    for (int i = 0; i < length; i++) {
        sample[i] = Math.random();
    }
    // normalize this sample
    double standardizedSample[] = StatUtils.normalize(sample);

    DescriptiveStatistics stats = new DescriptiveStatistics();
    // Add the data from the array
    for (int i = 0; i < length; i++) {
        stats.addValue(standardizedSample[i]);
    }
    // the calculations do have a limited precision    
    double distance = 1E-10;
    // check the mean an standard deviation
    Assert.assertEquals(0.0, stats.getMean(), distance);
    Assert.assertEquals(1.0, stats.getStandardDeviation(), distance);

}

Source File: CollectionUtils.java From argument-reasoning-comprehension-task with Apache License 2.0

6 votes

public static DescriptiveStatistics frequencyToStatistics(Frequency frequency)
{
    Iterator<Comparable<?>> comparableIterator = frequency.valuesIterator();
    DescriptiveStatistics result = new DescriptiveStatistics();
    while (comparableIterator.hasNext()) {
        Comparable<?> next = comparableIterator.next();
        long count = frequency.getCount(next);

        for (int i = 0; i < count; i++) {
            if (next instanceof Number) {
                result.addValue(((Number) next).doubleValue());
            }
        }
    }

    return result;
}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: SimpleRPNDHeterogeneousReductionStumpExperimentRunner.java From AILibs with GNU Affero General Public License v3.0

6 votes

public void conductExperiment(final MySQLReductionExperiment exp) throws Exception {
	List<Map<String, Object>> mccvResults = Util.conductSingleOneStepReductionExperiment(exp.getExperiment());
	DescriptiveStatistics errorRate = new DescriptiveStatistics();
	DescriptiveStatistics runtime = new DescriptiveStatistics();
	for (Map<String, Object> result : mccvResults) {
		errorRate.addValue((double) result.get("errorRate"));
		runtime.addValue((long) result.get("trainTime"));
	}

	/* prepapre values for experiment update */
	Map<String, Object> values = new HashMap<>();
	values.put(ERROR_RATE_MIN, errorRate.getMin());
	values.put(ERROR_RATE_MAX, errorRate.getMax());
	values.put(ERROR_RATE_MEAN, errorRate.getMean());
	values.put(ERROR_RATE_STD, errorRate.getStandardDeviation());
	values.put(RUNTIME_MIN, runtime.getMin());
	values.put(RUNTIME_MAX, runtime.getMax());
	values.put(RUNTIME_MEAN, runtime.getMean());
	values.put(RUNTIME_STD, runtime.getStandardDeviation());
	this.updateExperiment(exp, values);
}

Source File: StatUtilsTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
 * precision of 1E-10.
 */

@Test
public void testNormalize2() {
    // create an sample with 77 values    
    int length = 77;
    double sample[] = new double[length];
    for (int i = 0; i < length; i++) {
        sample[i] = Math.random();
    }
    // normalize this sample
    double standardizedSample[] = StatUtils.normalize(sample);

    DescriptiveStatistics stats = new DescriptiveStatistics();
    // Add the data from the array
    for (int i = 0; i < length; i++) {
        stats.addValue(standardizedSample[i]);
    }
    // the calculations do have a limited precision    
    double distance = 1E-10;
    // check the mean an standard deviation
    Assert.assertEquals(0.0, stats.getMean(), distance);
    Assert.assertEquals(1.0, stats.getStandardDeviation(), distance);

}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: HeatMapTask.java From mzmine2 with GNU General Public License v2.0

6 votes

private void scale(double[][] peakList) {
  DescriptiveStatistics stdDevStats = new DescriptiveStatistics();

  for (int columns = 0; columns < peakList.length; columns++) {
    stdDevStats.clear();
    for (int row = 0; row < peakList[columns].length; row++) {
      if (!Double.isInfinite(peakList[columns][row]) && !Double.isNaN(peakList[columns][row])) {
        stdDevStats.addValue(peakList[columns][row]);
      }
    }

    double stdDev = stdDevStats.getStandardDeviation();

    for (int row = 0; row < peakList[columns].length; row++) {
      if (stdDev != 0) {
        peakList[columns][row] = peakList[columns][row] / stdDev;
      }
    }
  }
}

Source File: StatUtilsTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
 * precision of 1E-10.
 */

public void testNormalize2() {
    // create an sample with 77 values    
    int length = 77;
    double sample[] = new double[length];
    for (int i = 0; i < length; i++) {
        sample[i] = Math.random();
    }
    // normalize this sample
    double standardizedSample[] = StatUtils.normalize(sample);

    DescriptiveStatistics stats = new DescriptiveStatistics();
    // Add the data from the array
    for (int i = 0; i < length; i++) {
        stats.addValue(standardizedSample[i]);
    }
    // the calculations do have a limited precision    
    double distance = 1E-10;
    // check the mean an standard deviation
    assertEquals(0.0, stats.getMean(), distance);
    assertEquals(1.0, stats.getStandardDeviation(), distance);

}

Source File: StatUtils.java From astor with GNU General Public License v2.0

6 votes

/**
 * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
 *
 * @param sample Sample to normalize.
 * @return normalized (standardized) sample.
 * @since 2.2
 */
public static double[] normalize(final double[] sample) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    // Add the data from the series to stats
    for (int i = 0; i < sample.length; i++) {
        stats.addValue(sample[i]);
    }

    // Compute mean and standard deviation
    double mean = stats.getMean();
    double standardDeviation = stats.getStandardDeviation();

    // initialize the standardizedSample, which has the same length as the sample
    double[] standardizedSample = new double[sample.length];

    for (int i = 0; i < sample.length; i++) {
        // z = (x- mean)/standardDeviation
        standardizedSample[i] = (sample[i] - mean) / standardDeviation;
    }
    return standardizedSample;
}

Source File: ExpressionFigure.java From biojava with GNU Lesser General Public License v2.1

5 votes

/**
 *
 * @param title
 * @param _siList
 * @param variable
 */
public void setSurvivalInfo(ArrayList<String> title, ArrayList<SurvivalInfo> _siList, String variable) {
	this.siList = new ArrayList<SurvivalInfo>();
	this.title = title;
	this.variable = variable;

	minX = 0.0;
	maxX = (double) _siList.size();
	minY = 0.0;
	maxY = null;
	DescriptiveStatistics ds = new DescriptiveStatistics();
	for (SurvivalInfo si : _siList) {
		this.siList.add(si);
		String v = si.getOriginalMetaData(variable);
		Double value = Double.parseDouble(v);
		ds.addValue(value);
		if (maxTime == null || maxTime < si.getTime()) {
			maxTime = si.getTime();
		}

	}
	SurvivalInfoValueComparator sivc = new SurvivalInfoValueComparator(variable);
	Collections.sort(this.siList, sivc);
	mean = ds.getMean();
	minY = ds.getMin();
	maxY = ds.getMax();
	minY = (double) Math.floor(minY);
	maxY = (double) Math.ceil(maxY);


	this.repaint();
}

Source File: APARegionStatistics.java From Juicebox with MIT License

5 votes

public static DescriptiveStatistics statistics(double[][] x) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (double[] row : x)
        for (double val : row)
            stats.addValue(val);
    return stats;
}

Source File: APARegionStatistics.java From JuiceboxLegacy with MIT License

5 votes

public static DescriptiveStatistics statistics(double[][] x) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (double[] row : x)
        for (double val : row)
            stats.addValue(val);
    return stats;
}

Source File: MySQLEnsembleOfSimpleOneStepReductionsExperimentRunner.java From AILibs with GNU Affero General Public License v3.0

5 votes

public void conductExperiment(final MySQLEnsembleOfSimpleOneStepReductionsExperiment exp) throws Exception {
	List<Map<String, Object>> mccvResults = Util.conductEnsembleOfOneStepReductionsExperiment(exp.getExperiment());
	DescriptiveStatistics errorRate = new DescriptiveStatistics();
	DescriptiveStatistics runtime = new DescriptiveStatistics();
	for (Map<String, Object> result : mccvResults) {
		errorRate.addValue((double) result.get(KEY_ERROR_RATE));
		runtime.addValue((long) result.get("trainTime"));
	}

	/* prepapre values for experiment update */
	Map<String, Object> values = new HashMap<>();
	values.put(KEY_ERROR_RATE, errorRate.getMean());
	this.updateExperiment(exp, values);
}

Source File: Step2dGoldReasonStatistics.java From argument-reasoning-comprehension-task with Apache License 2.0

5 votes

public static void main(String[] args)
        throws IOException
{
    File inputFile = new File(
            "mturk/annotation-task/data/32-reasons-batch-0001-5000-2026args-gold.xml.gz");

    // read all arguments from the original corpus
    List<StandaloneArgument> arguments = XStreamSerializer
            .deserializeArgumentListFromXML(inputFile);
    System.out.println("Arguments: " + arguments.size());

    Frequency frequency = new Frequency();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    for (StandaloneArgument argument : arguments) {
        JCas jCas = argument.getJCas();
        Collection<Premise> premises = JCasUtil.select(jCas, Premise.class);

        frequency.addValue(premises.size());
        statistics.addValue(premises.size());
    }

    System.out.println(frequency);
    System.out.println(statistics.getSum());
    System.out.println(statistics.getMean());
    System.out.println(statistics.getStandardDeviation());
}

Source File: Step3dExportGistToXMIFiles.java From argument-reasoning-comprehension-task with Apache License 2.0

5 votes

private static void export(File argumentsWithReasonsFile, File argumentsWithGistFile,
        File outputDir)
        throws Exception
{
    List<StandaloneArgument> arguments = ExportHelper.copyReasonAnnotationsWithGistOnly(
            argumentsWithReasonsFile, argumentsWithGistFile);

    String metaDataCSV = ExportHelper.exportMetaDataToCSV(arguments);
    FileUtils.write(new File(outputDir, "metadata.csv"), metaDataCSV, "utf-8");

    Frequency premisesFrequency = new Frequency();
    DescriptiveStatistics premisesStatistics = new DescriptiveStatistics();

    // and export them all as XMI files using standard DKPro pipeline
    for (StandaloneArgument argument : arguments) {
        JCas jCas = argument.getJCas();
        SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngineDescription(
                XmiWriter.class,
                XmiWriter.PARAM_TARGET_LOCATION, outputDir,
                XmiWriter.PARAM_USE_DOCUMENT_ID, true,
                XmiWriter.PARAM_OVERWRITE, true
        ));

        // count all premises
        int count = JCasUtil.select(jCas, Premise.class).size();
        premisesStatistics.addValue(count);
        premisesFrequency.addValue(count);
    }

    System.out.println("Premises total: " + premisesStatistics.getSum());
    System.out.println("Argument: " + arguments.size());
    System.out.println(premisesFrequency);
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() throws Exception {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.X.copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).getData();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() throws Exception {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.X.copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).getData();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: Step7aOriginalWarrantHITCreator.java From argument-reasoning-comprehension-task with Apache License 2.0

4 votes

@Override
protected List<ReasonClaimWarrantContainer> preFilterList(
        List<ReasonClaimWarrantContainer> list)
{
    Set<String> uniqueIDs = new TreeSet<>();
    // check we have unique ID's!!!
    for (ReasonClaimWarrantContainer claimWarrantContainer : list) {
        String id = claimWarrantContainer.getReasonClaimWarrantId();
        if (uniqueIDs.contains(id)) {
            throw new IllegalStateException("Repeated ID: " + id);
        }

        uniqueIDs.add(id);
    }

    // do some statistics first
    DescriptiveStatistics statHard = new DescriptiveStatistics();
    DescriptiveStatistics statLogic = new DescriptiveStatistics();

    XYSeriesCollection dataset = new XYSeriesCollection();
    XYSeries series = new XYSeries("Random");
    for (ReasonClaimWarrantContainer container : list) {
        series.add(container.getLogicScore(), container.getHardScore());
        statHard.addValue(container.getHardScore());
        statLogic.addValue(container.getLogicScore());
    }
    dataset.addSeries(series);

    //        System.out.println(statHard);
    //        System.out.println(statLogic);

    JFrame jFrame = new JFrame();
    // This will create the dataset
    // based on the dataset we create the chart
    JFreeChart chart = ChartFactory
            .createScatterPlot("title", "Logic", "Hard", dataset, PlotOrientation.HORIZONTAL,
                    false, false, false);
    // we put the chart into a panel
    ChartPanel chartPanel = new ChartPanel(chart);
    // default size
    chartPanel.setPreferredSize(new java.awt.Dimension(500, 270));
    // add it to our application
    jFrame.setContentPane(chartPanel);

    //        jFrame.pack();
    //        jFrame.setVisible(true);

    // ok, take only those with logic > 0.68
    return list.stream()
            .filter(reasonClaimWarrantContainer -> reasonClaimWarrantContainer.getLogicScore()
                    >= 0.68).collect(Collectors.toList());
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() throws Exception {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.X.copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).getData();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Java Code Examples for org.apache.commons.math.stat.descriptive.DescriptiveStatistics#addValue()