org.apache.commons.math3.random.RandomGenerator#setSeed

Source File: PercentileTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testStoredVsDirect() {
    final RandomGenerator rand= new JDKRandomGenerator();
    rand.setSeed(Long.MAX_VALUE);
    for (final int sampleSize:sampleSizes) {
        final double[] data = new NormalDistribution(rand,4000, 50)
                            .sample(sampleSize);
        for (final double p:new double[] {50d,95d}) {
            for (final Percentile.EstimationType e : Percentile.EstimationType.values()) {
                reset(p, e);
                final Percentile pStoredData = getUnivariateStatistic();
                pStoredData.setData(data);
                final double storedDataResult=pStoredData.evaluate();
                pStoredData.setData(null);
                final Percentile pDirect = getUnivariateStatistic();
                Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e,
                        storedDataResult,
                        pDirect.evaluate(data),0d);
            }
        }
    }
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: PercentileTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testStoredVsDirect() {
    final RandomGenerator rand= new JDKRandomGenerator();
    rand.setSeed(Long.MAX_VALUE);
    for (final int sampleSize:sampleSizes) {
        final double[] data = new NormalDistribution(rand,4000, 50)
                            .sample(sampleSize);
        for (final double p:new double[] {50d,95d}) {
            for (final Percentile.EstimationType e : Percentile.EstimationType.values()) {
                reset(p, e);
                final Percentile pStoredData = getUnivariateStatistic();
                pStoredData.setData(data);
                final double storedDataResult=pStoredData.evaluate();
                pStoredData.setData(null);
                final Percentile pDirect = getUnivariateStatistic();
                Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e,
                        storedDataResult,
                        pDirect.evaluate(data),0d);
            }
        }
    }
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}

Source File: PartitionTest.java From sequence-mining with GNU General Public License v3.0

5 votes

@Test
public void testInterleavingGenerator() {

	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	final Multiset<Sequence> seqsI = HashMultiset.create();
	seqsI.add(new Sequence(1, 2, 3));
	seqsI.add(new Sequence(4, 5));
	seqsI.add(new Sequence(6));
	seqsI.add(new Sequence(7));

	final HashMap<Sequence, Double> seqsG = new HashMap<>();
	for (final Sequence seq : seqsI.elementSet()) {
		seqsG.put(seq, 1.0);
	}

	final Map<Sequence, EnumeratedIntegerDistribution> countDists = new HashMap<>();
	final EnumeratedIntegerDistribution oneRepeat = new EnumeratedIntegerDistribution(randomC, new int[] { 1 },
			new double[] { 1.0 });
	countDists.put(new Sequence(1, 2, 3), oneRepeat);
	countDists.put(new Sequence(4, 5), oneRepeat);
	countDists.put(new Sequence(6), oneRepeat);
	countDists.put(new Sequence(7), oneRepeat);

	final HashSet<Transaction> transG = new HashSet<>();
	for (int i = 0; i < 700000; i++)
		transG.add(
				TransactionGenerator.sampleFromDistribution(random, seqsG, countDists, new HashMap<>(), randomI));
	// Note that upper bound is exact when there are no repetitions
	assertEquals(transG.size(), modP(seqsI.iterator()), EPS);
}

Source File: RandomManager.java From myrrix-recommender with Apache License 2.0

5 votes

/**
 * Causes all known instances of {@link RandomGenerator}, and future ones, to be started from a fixed
 * seed. This is useful for making tests deterministic.
 */
public static void useTestSeed() {
  useTestSeed = true;
  synchronized (INSTANCES) {
    for (RandomGenerator random : INSTANCES.keySet()) {
      random.setSeed(TEST_SEED);
    }
    INSTANCES.clear();
  }
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() throws Exception {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Source File: TransactionGenerator.java From sequence-mining with GNU General Public License v3.0

4 votes

/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
		final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
				throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Add to distribution class for easy sampling
	final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
	for (final Sequence seq : sequences.keySet()) {
		final List<Integer> singletons = new ArrayList<>();
		final List<Double> probs = new ArrayList<>();
		for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
			singletons.add(entry.getKey());
			probs.add(entry.getValue());
		}
		final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
				Ints.toArray(singletons), Doubles.toArray(probs));
		dists.put(seq, dist);
	}

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}

Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0

4 votes

/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}

Java Code Examples for org.apache.commons.math3.random.RandomGenerator#setSeed()