Java Code Examples for org.apache.commons.math3.random.RandomGenerator#setSeed()
The following examples show how to use
org.apache.commons.math3.random.RandomGenerator#setSeed() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PercentileTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testStoredVsDirect() { final RandomGenerator rand= new JDKRandomGenerator(); rand.setSeed(Long.MAX_VALUE); for (final int sampleSize:sampleSizes) { final double[] data = new NormalDistribution(rand,4000, 50) .sample(sampleSize); for (final double p:new double[] {50d,95d}) { for (final Percentile.EstimationType e : Percentile.EstimationType.values()) { reset(p, e); final Percentile pStoredData = getUnivariateStatistic(); pStoredData.setData(data); final double storedDataResult=pStoredData.evaluate(); pStoredData.setData(null); final Percentile pDirect = getUnivariateStatistic(); Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e, storedDataResult, pDirect.evaluate(data),0d); } } } }
Example 2
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 2, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 3, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 2, 3, 3, 3 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 3
Source File: PercentileTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testStoredVsDirect() { final RandomGenerator rand= new JDKRandomGenerator(); rand.setSeed(Long.MAX_VALUE); for (final int sampleSize:sampleSizes) { final double[] data = new NormalDistribution(rand,4000, 50) .sample(sampleSize); for (final double p:new double[] {50d,95d}) { for (final Percentile.EstimationType e : Percentile.EstimationType.values()) { reset(p, e); final Percentile pStoredData = getUnivariateStatistic(); pStoredData.setData(data); final double storedDataResult=pStoredData.evaluate(); pStoredData.setData(null); final Percentile pDirect = getUnivariateStatistic(); Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e, storedDataResult, pDirect.evaluate(data),0d); } } } }
Example 4
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 2, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 3, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 2, 3, 3, 3 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 5
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 1, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 4, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 1, 3, 4, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 6
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 2, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 3, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 2, 3, 3, 3 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 7
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 1, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 4, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 1, 3, 4, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 8
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 2, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 3, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 2, 3, 3, 3 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 9
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 2, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 3, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 2, 3, 3, 3 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 10
Source File: NaturalRankingTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNaNsFixedTiesRandom() { RandomGenerator randomGenerator = new JDKRandomGenerator(); randomGenerator.setSeed(1000); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED, randomGenerator); double[] ranks = ranking.rank(exampleData); double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesFirst); correctRanks = new double[] { 1, 1, 4, 3, 5 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(tiesLast); correctRanks = new double[] { 3, 4, 2, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleNaNs); correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(multipleTies); correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 }; TestUtils.assertEquals(correctRanks, ranks, 0d); ranks = ranking.rank(allSame); correctRanks = new double[] { 1, 3, 4, 4 }; TestUtils.assertEquals(correctRanks, ranks, 0d); }
Example 11
Source File: PartitionTest.java From sequence-mining with GNU General Public License v3.0 | 5 votes |
@Test public void testInterleavingGenerator() { final Random random = new Random(1); final Random randomI = new Random(10); final RandomGenerator randomC = new JDKRandomGenerator(); randomC.setSeed(100); final Multiset<Sequence> seqsI = HashMultiset.create(); seqsI.add(new Sequence(1, 2, 3)); seqsI.add(new Sequence(4, 5)); seqsI.add(new Sequence(6)); seqsI.add(new Sequence(7)); final HashMap<Sequence, Double> seqsG = new HashMap<>(); for (final Sequence seq : seqsI.elementSet()) { seqsG.put(seq, 1.0); } final Map<Sequence, EnumeratedIntegerDistribution> countDists = new HashMap<>(); final EnumeratedIntegerDistribution oneRepeat = new EnumeratedIntegerDistribution(randomC, new int[] { 1 }, new double[] { 1.0 }); countDists.put(new Sequence(1, 2, 3), oneRepeat); countDists.put(new Sequence(4, 5), oneRepeat); countDists.put(new Sequence(6), oneRepeat); countDists.put(new Sequence(7), oneRepeat); final HashSet<Transaction> transG = new HashSet<>(); for (int i = 0; i < 700000; i++) transG.add( TransactionGenerator.sampleFromDistribution(random, seqsG, countDists, new HashMap<>(), randomI)); // Note that upper bound is exact when there are no repetitions assertEquals(transG.size(), modP(seqsI.iterator()), EPS); }
Example 12
Source File: RandomManager.java From myrrix-recommender with Apache License 2.0 | 5 votes |
/** * Causes all known instances of {@link RandomGenerator}, and future ones, to be started from a fixed * seed. This is useful for making tests deterministic. */ public static void useTestSeed() { useTestSeed = true; synchronized (INSTANCES) { for (RandomGenerator random : INSTANCES.keySet()) { random.setSeed(TEST_SEED); } INSTANCES.clear(); } }
Example 13
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() throws Exception { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 14
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 15
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 16
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 17
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 18
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }
Example 19
Source File: TransactionGenerator.java From sequence-mining with GNU General Public License v3.0 | 4 votes |
/** * Generate transactions from set of interesting sequences * * @return set of sequences added to transaction */ public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences, final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile) throws IOException { // Set random number seeds final Random random = new Random(1); final Random randomI = new Random(10); final RandomGenerator randomC = new JDKRandomGenerator(); randomC.setSeed(100); // Storage for sequences actually added final HashMap<Sequence, Double> addedSequences = new HashMap<>(); // Set output file final PrintWriter out = new PrintWriter(outFile, "UTF-8"); // Add to distribution class for easy sampling final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>(); for (final Sequence seq : sequences.keySet()) { final List<Integer> singletons = new ArrayList<>(); final List<Double> probs = new ArrayList<>(); for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) { singletons.add(entry.getKey()); probs.add(entry.getValue()); } final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC, Ints.toArray(singletons), Doubles.toArray(probs)); dists.put(seq, dist); } // Generate transaction database int count = 0; while (count < noTransactions) { // Generate transaction from distribution final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI); for (final int item : transaction) { out.print(item + " -1 "); } if (!transaction.isEmpty()) { out.print("-2"); out.println(); count++; } } out.close(); // Print file to screen if (VERBOSE) { final FileReader reader = new FileReader(outFile); final LineIterator it = new LineIterator(reader); while (it.hasNext()) { System.out.println(it.nextLine()); } LineIterator.closeQuietly(it); } return addedSequences; }
Example 20
Source File: GLSMultipleLinearRegressionTest.java From astor with GNU General Public License v2.0 | 4 votes |
/** * Generate an error covariance matrix and sample data representing models * with this error structure. Then verify that GLS estimated coefficients, * on average, perform better than OLS. */ @Test public void testGLSEfficiency() { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(200); // Seed has been selected to generate non-trivial covariance // Assume model has 16 observations (will use Longley data). Start by generating // non-constant variances for the 16 error terms. final int nObs = 16; double[] sigma = new double[nObs]; for (int i = 0; i < nObs; i++) { sigma[i] = 10 * rg.nextDouble(); } // Now generate 1000 error vectors to use to estimate the covariance matrix // Columns are draws on N(0, sigma[col]) final int numSeeds = 1000; RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs); for (int i = 0; i < numSeeds; i++) { for (int j = 0; j < nObs; j++) { errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]); } } // Get covariance matrix for columns RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix(); // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg); double[] errorMeans = new double[nObs]; // Counting on init to 0 here CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov, 1.0e-12 * cov.getNorm(), rawGenerator); // Now start generating models. Use Longley X matrix on LHS // and Longley OLS beta vector as "true" beta. Generate // Y values by XB + u where u is a CorrelatedRandomVector generated // from cov. OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(longley, nObs, 6); final RealVector b = ols.calculateBeta().copy(); final RealMatrix x = ols.getX().copy(); // Create a GLS model to reuse GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression(); gls.newSampleData(longley, nObs, 6); gls.newCovarianceData(cov.getData()); // Create aggregators for stats measuring model performance DescriptiveStatistics olsBetaStats = new DescriptiveStatistics(); DescriptiveStatistics glsBetaStats = new DescriptiveStatistics(); // Generate Y vectors for 10000 models, estimate GLS and OLS and // Verify that OLS estimates are better final int nModels = 10000; for (int i = 0; i < nModels; i++) { // Generate y = xb + u with u cov RealVector u = MatrixUtils.createRealVector(gen.nextVector()); double[] y = u.add(x.operate(b)).toArray(); // Estimate OLS parameters ols.newYSampleData(y); RealVector olsBeta = ols.calculateBeta(); // Estimate GLS parameters gls.newYSampleData(y); RealVector glsBeta = gls.calculateBeta(); // Record deviations from "true" beta double dist = olsBeta.getDistance(b); olsBetaStats.addValue(dist * dist); dist = glsBeta.getDistance(b); glsBetaStats.addValue(dist * dist); } // Verify that GLS is on average more efficient, lower variance assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean()); assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation()); }