org.apache.commons.math3.distribution.TDistribution#cumulativeProbability

Source File: PearsonsCorrelationTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}

Source File: PearsonsCorrelationTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}

Source File: PearsonsCorrelation.java From astor with GNU General Public License v2.0

6 votes

/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}

Source File: PearsonsCorrelation.java From astor with GNU General Public License v2.0

6 votes

/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}

Source File: PearsonsCorrelationTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}

Source File: PearsonsCorrelationTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() throws Exception {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}

Source File: XDataFrameLeastSquares.java From morpheus-core with Apache License 2.0

5 votes

/**
 * Computes the T-stats and the P-Value for all regression parameters
 */
private void computeParameterSignificance(RealVector betaVector) {
    try {
        final double residualDF = frame.rows().count() - (regressors.size() + 1);
        final TDistribution distribution = new TDistribution(residualDF);
        final double interceptParam = betaVector.getEntry(0);
        final double interceptStdError = intercept.data().getDouble(0, Field.STD_ERROR);
        final double interceptTStat = interceptParam / interceptStdError;
        final double interceptPValue = distribution.cumulativeProbability(-Math.abs(interceptTStat)) * 2d;
        final double interceptCI = interceptStdError * distribution.inverseCumulativeProbability(1d - alpha / 2d);
        this.intercept.data().setDouble(0, Field.PARAMETER, interceptParam);
        this.intercept.data().setDouble(0, Field.T_STAT, interceptTStat);
        this.intercept.data().setDouble(0, Field.P_VALUE, interceptPValue);
        this.intercept.data().setDouble(0, Field.CI_LOWER, interceptParam - interceptCI);
        this.intercept.data().setDouble(0, Field.CI_UPPER, interceptParam + interceptCI);
        final int offset = hasIntercept() ? 1 : 0;
        for (int i=0; i<regressors.size(); ++i) {
            final C regressor = regressors.get(i);
            final double betaParam = betaVector.getEntry(i + offset);
            final double betaStdError = betas.data().getDouble(regressor, Field.STD_ERROR);
            final double tStat = betaParam / betaStdError;
            final double pValue = distribution.cumulativeProbability(-Math.abs(tStat)) * 2d;
            final double betaCI = betaStdError * distribution.inverseCumulativeProbability(1d - alpha / 2d);
            this.betas.data().setDouble(regressor, Field.PARAMETER, betaParam);
            this.betas.data().setDouble(regressor, Field.T_STAT, tStat);
            this.betas.data().setDouble(regressor, Field.P_VALUE, pValue);
            this.betas.data().setDouble(regressor, Field.CI_LOWER, betaParam - betaCI);
            this.betas.data().setDouble(regressor, Field.CI_UPPER, betaParam + betaCI);
        }
    } catch (Exception ex) {
        throw new DataFrameException("Failed to compute regression coefficient t-stats and p-values", ex);
    }
}

Source File: WeightedLeastSquaresRegression.java From Strata with Apache License 2.0

5 votes

private LeastSquaresRegressionResult getResultWithStatistics(
    double[][] x, double[][] w, double[] y, double[] betas, double[] yModel,
    DoubleMatrix transpose, DoubleMatrix matrix, boolean useIntercept) {

  double yMean = 0.;
  for (double y1 : y) {
    yMean += y1;
  }
  yMean /= y.length;
  double totalSumOfSquares = 0.;
  double errorSumOfSquares = 0.;
  int n = x.length;
  int k = betas.length;
  double[] residuals = new double[n];
  double[] standardErrorsOfBeta = new double[k];
  double[] tStats = new double[k];
  double[] pValues = new double[k];
  for (int i = 0; i < n; i++) {
    totalSumOfSquares += w[i][i] * (y[i] - yMean) * (y[i] - yMean);
    residuals[i] = y[i] - yModel[i];
    errorSumOfSquares += w[i][i] * residuals[i] * residuals[i];
  }
  double regressionSumOfSquares = totalSumOfSquares - errorSumOfSquares;
  double[][] covarianceBetas = convertArray(ALGEBRA.getInverse(ALGEBRA.multiply(transpose, matrix)).toArray());
  double rSquared = regressionSumOfSquares / totalSumOfSquares;
  double adjustedRSquared = 1. - (1 - rSquared) * (n - 1) / (n - k);
  double meanSquareError = errorSumOfSquares / (n - k);
  TDistribution studentT = new TDistribution(n - k);
  for (int i = 0; i < k; i++) {
    standardErrorsOfBeta[i] = Math.sqrt(meanSquareError * covarianceBetas[i][i]);
    tStats[i] = betas[i] / standardErrorsOfBeta[i];
    pValues[i] = 1 - studentT.cumulativeProbability(Math.abs(tStats[i]));
  }
  return new WeightedLeastSquaresRegressionResult(
      betas, residuals, meanSquareError, standardErrorsOfBeta, rSquared, adjustedRSquared, tStats, pValues, useIntercept);
}

Source File: RandomWalkSamplerTest.java From log-synth with Apache License 2.0

4 votes

@Test
public void testBasics() throws IOException {
    // this sampler has four variables
    // g1 is gamma distributed with alpha = 0.2, beta = 0.2
    // v1 is unit normal
    // v2 is normal with mean = 0, sd = 2
    // v3 is gamma-normal with dof=2, mean = 0.
    SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read());

    TDigest tdG1 = new AVLTreeDigest(500);
    TDigest tdG2 = new AVLTreeDigest(500);
    TDigest td1 = new AVLTreeDigest(500);
    TDigest td2 = new AVLTreeDigest(500);
    TDigest td3 = new AVLTreeDigest(500);

    double x1 = 0;
    double x2 = 0;
    double x3 = 0;

    for (int i = 0; i < 1000000; i++) {
        JsonNode r = s.sample();
        tdG1.add(r.get("g1").asDouble());
        tdG2.add(r.get("g2").asDouble());

        double step1 = r.get("v1").get("step").asDouble();
        td1.add(step1);
        x1 += step1;
        assertEquals(x1, r.get("v1").get("value").asDouble(), 0);
        assertEquals(x1, r.get("v1-bare").asDouble(), 0);

        double step2 = r.get("v2").get("step").asDouble();
        td2.add(step2);
        x2 += step2;
        assertEquals(x2, r.get("v2").get("value").asDouble(), 0);

        double step3 = r.get("v3").get("step").asDouble();
        td3.add(step3);
        x3 += step3;
        assertEquals(x3, r.get("v3").get("value").asDouble(), 0);
    }

    // now compare against reference distributions to test accuracy of the observed step distributions
    NormalDistribution normalDistribution = new NormalDistribution();
    GammaDistribution gd1 = new GammaDistribution(0.2, 5);
    GammaDistribution gd2 = new GammaDistribution(1, 1);
    TDistribution tDistribution = new TDistribution(2);
    for (double q : new double[]{0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99}) {
        double uG1 = gd1.cumulativeProbability(tdG1.quantile(q));
        assertEquals(q, uG1, (1 - q) * q * 10e-2);

        double uG2 = gd2.cumulativeProbability(tdG2.quantile(q));
        assertEquals(q, uG2, (1 - q) * q * 10e-2);

        double u1 = normalDistribution.cumulativeProbability(td1.quantile(q));
        assertEquals(q, u1, (1 - q) * q * 10e-2);

        double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2);
        assertEquals(q, u2, (1 - q) * q * 10e-2);

        double u3 = tDistribution.cumulativeProbability(td3.quantile(q));
        assertEquals(q, u3, (1 - q) * q * 10e-2);
    }
}

Source File: PearsonsCorrelation.java From astor with GNU General Public License v2.0

4 votes

/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 *
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 *
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * <p>To use this method, one of the constructors that supply an input
 * matrix must have been used to create this instance.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 * @throws NullPointerException if this instance was created with no data
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}