Java Code Examples for org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression#newSampleData()

The following examples show how to use org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression#newSampleData() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StatsUtil.java    From MeteoInfo with GNU Lesser General Public License v3.0 7 votes vote down vote up
/**
 * Implements ordinary least squares (OLS) to estimate the parameters of a 
 * multiple linear regression model.
 * @param y Y sample data - one dimension array
 * @param x X sample data - two dimension array
 * @param noIntercept No intercept
 * @return Estimated regression parameters and residuals
 */
public static Array[] multipleLineRegress_OLS(Array y, Array x, boolean noIntercept) {
    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.setNoIntercept(noIntercept);
    double[] yy = (double[])ArrayUtil.copyToNDJavaArray_Double(y);
    double[][] xx = (double[][])ArrayUtil.copyToNDJavaArray_Double(x);
    regression.newSampleData(yy, xx);
    double[] para = regression.estimateRegressionParameters();
    double[] residuals = regression.estimateResiduals();
    int k = para.length;
    int n = residuals.length;
    Array aPara = Array.factory(DataType.DOUBLE, new int[]{k});
    Array aResiduals = Array.factory(DataType.DOUBLE, new int[]{n});
    for (int i = 0; i < k; i++){
        aPara.setDouble(i, para[i]);
    }
    for (int i = 0; i < k; i++){
        aResiduals.setDouble(i, residuals[i]);
    }
    
    return new Array[]{aPara, aResiduals};
}
 
Example 2
Source File: OLSTests.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Test(dataProvider = "thresholds")
public void testMLR(double threshold) {
    final Range<String> rowKeys = Range.of(0, 5000).map(i -> "R" + i);
    final DataFrame<String,String> frame = DataFrame.of(rowKeys, String.class, columns -> {
        columns.add("Y", Double.class).applyDoubles(v -> v.rowOrdinal() + 1d + (Math.random() * 2));
        columns.add("X1", Double.class).applyDoubles(v -> v.rowOrdinal() + 1d + (Math.random() * 3));
        columns.add("X2", Double.class).applyDoubles(v -> v.rowOrdinal() + 1d + (Math.random() * 4));
        columns.add("X3", Double.class).applyDoubles(v -> v.rowOrdinal() + 1d + (Math.random() * 5));
        columns.add("X4", Double.class).applyDoubles(v -> v.rowOrdinal() + 1d + (Math.random() * 6));
    });

    final OLSMultipleLinearRegression model = new OLSMultipleLinearRegression(threshold);
    final double[] dependent = frame.col("Y").toDoubleStream().toArray();
    final double[][] independent = new double[frame.rows().count()][3];
    frame.rows().forEach(row -> {
        independent[row.ordinal()][0] = row.getDouble("X1");
        independent[row.ordinal()][1] = row.getDouble("X2");
        independent[row.ordinal()][2] = row.getDouble("X3");
    });
    model.newSampleData(dependent, independent);
    frame.regress().ols("Y", Arrays.asList("X1", "X2", "X3"), true, mlr -> {
        System.out.println(mlr);
        assertResultsMatch(mlr, model);
        return Optional.empty();
    });
}
 
Example 3
Source File: Forecast.java    From xDrip with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void setValues(double[] y, double[] x) {
    if (x.length != y.length) {
        throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length));
    }
    double[][] xData = new double[x.length][];
    for (int i = 0; i < x.length; i++) {
        // the implementation determines how to produce a vector of predictors from a single x
        xData[i] = xVector(x[i]);
    }
    if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y
        y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given
        for (int i = 0; i < x.length; i++) {
            y[i] = Math.log(y[i]);
        }
    }
    final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
    ols.newSampleData(y, xData); // provide the data to the model
    coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs
    last_error_rate = ols.estimateErrorVariance();
    Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:"
            + JoH.qs(last_error_rate, 4)
            + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4)
            + "  stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4));
}
 
Example 4
Source File: Forecast.java    From xDrip with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void setValues(double[] y, double[] x) {
    if (x.length != y.length) {
        throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length));
    }
    double[][] xData = new double[x.length][];
    for (int i = 0; i < x.length; i++) {
        // the implementation determines how to produce a vector of predictors from a single x
        xData[i] = xVector(x[i]);
    }
    if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y
        y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given
        for (int i = 0; i < x.length; i++) {
            y[i] = Math.log(y[i]);
        }
    }
    final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
    ols.newSampleData(y, xData); // provide the data to the model
    coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs
    last_error_rate = ols.estimateErrorVariance();
    Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:"
            + JoH.qs(last_error_rate, 4)
            + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4)
            + "  stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4));
}
 
Example 5
Source File: Forecast.java    From xDrip-plus with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void setValues(double[] y, double[] x) {
    if (x.length != y.length) {
        throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length));
    }
    double[][] xData = new double[x.length][];
    for (int i = 0; i < x.length; i++) {
        // the implementation determines how to produce a vector of predictors from a single x
        xData[i] = xVector(x[i]);
    }
    if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y
        y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given
        for (int i = 0; i < x.length; i++) {
            y[i] = Math.log(y[i]);
        }
    }
    final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
    ols.newSampleData(y, xData); // provide the data to the model
    coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs
    last_error_rate = ols.estimateErrorVariance();
    Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:"
            + JoH.qs(last_error_rate, 4)
            + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4)
            + "  stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4));
}
 
Example 6
Source File: Forecast.java    From xDrip-plus with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void setValues(double[] y, double[] x) {
    if (x.length != y.length) {
        throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length));
    }
    double[][] xData = new double[x.length][];
    for (int i = 0; i < x.length; i++) {
        // the implementation determines how to produce a vector of predictors from a single x
        xData[i] = xVector(x[i]);
    }
    if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y
        y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given
        for (int i = 0; i < x.length; i++) {
            y[i] = Math.log(y[i]);
        }
    }
    final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
    ols.newSampleData(y, xData); // provide the data to the model
    coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs
    last_error_rate = ols.estimateErrorVariance();
    Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:"
            + JoH.qs(last_error_rate, 4)
            + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4)
            + "  stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4));
}
 
Example 7
Source File: OLSTrendLine.java    From hortonmachine with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void setValues( double[] y, double[] x ) {
    if (x.length != y.length) {
        throw new IllegalArgumentException(
                String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length));
    }
    double[][] xData = new double[x.length][];
    for( int i = 0; i < x.length; i++ ) {
        // the implementation determines how to produce a vector of predictors from a single x
        xData[i] = xVector(x[i]);
    }
    if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y
        y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were
                                        // given
        for( int i = 0; i < x.length; i++ ) {
            y[i] = Math.log(y[i]);
        }
    }
    ols = new OLSMultipleLinearRegression();
    ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
    ols.newSampleData(y, xData); // provide the data to the model
    coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters());
}
 
Example 8
Source File: InteractionModel.java    From systemsgenetics with GNU General Public License v3.0 6 votes vote down vote up
public void calculateSumOfSquaresOLS(double[] expressionValues) throws IOException, IllegalAccessException {
	// OLS = Ordinary Least Squares
	OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
	// if GetIntercept is false, remove the intercept (Beta1) from the linear model
	regression.setNoIntercept(true);
	try{
		regression.newSampleData(expressionValues, this.getObservedValues());
	}
	catch (DimensionMismatchException e){
		DeconvolutionLogger.log.info(String.format("Length of expression and genotype data not the same\nexpression length: %d\nobserved values length: %d\n", 
				expressionValues.length, this.getNumberOfTerms()));
		throw(e);
	}
	this.setSumOfSquares(regression.calculateResidualSumOfSquares());
	this.setDegreesOfFreedom(expressionValues.length - (this.getNumberOfTerms() + 1));
	setResiduals(regression.estimateResiduals());
	setEstimatedRegressionParameters(regression.estimateRegressionParameters());
}
 
Example 9
Source File: OLSTrendLine.java    From MeteoInfo with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
    public void setValues(Array y, Array x) {
        if (x.getSize() != y.getSize()) {
            throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)",y.getSize(),x.getSize()));
        }

        y = y.copyIfView();
        x = x.copyIfView();

        double[][] xData = new double[(int)x.getSize()][];
        for (int i = 0; i < x.getSize(); i++) {
            // the implementation determines how to produce a vector of predictors from a single x
            xData[i] = xVector(x.getDouble(i));
        }
        double[] yy = new double[(int)y.getSize()];
        if(logY()) { // in some models we are predicting ln y, so we replace each y with ln y
            for (int i = 0; i < yy.length; i++) {
                if (i < x.getSize())
                    yy[i] = Math.log(y.getDouble(i));
                else
                    yy[i] = y.getDouble(i);
            }
        } else {
            for (int i = 0; i < yy.length; i++) {
                yy[i] = y.getDouble(i);
            }
        }
//        double[] yy = (double[])y.copyTo1DJavaArray();
//        if(logY()) { // in some models we are predicting ln y, so we replace each y with ln y
//            yy = Arrays.copyOf(yy, yy.length); // user might not be finished with the array we were given
//            for (int i = 0; i < x.getSize(); i++) {
//                yy[i] = Math.log(yy[i]);
//            }
//        }
        OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
        ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired
        ols.newSampleData(yy, xData); // provide the data to the model
        coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs
        rs = ols.calculateRSquared();
    }
 
Example 10
Source File: LinearRegressionModelParameters.java    From cruise-control with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Trigger the calculation of the model parameters.
 * @return True if the parameters are generated, otherwise false;
 */
public synchronized boolean updateModelCoefficient() {
  if (validBuckets().size() < MIN_CPU_UTIL_OBSERVATION_BUCKETS) {
    return false;
  }
  try {
    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.setNoIntercept(true);
    boolean ignoreLeaderBytesOut = !isLeaderBytesInAndOutRatioDiverseEnough();
    regression.newSampleData(aggregateSampleCpuUtilData(),
                             aggregateSampleBytesRateData(ignoreLeaderBytesOut));
    double[] parameters = regression.estimateRegressionParameters();
    int leaderBytesInIndex = 0;
    int leaderBytesOutIndex = 1;
    int followerBytesInIndex = ignoreLeaderBytesOut ? 1 : 2;
    _coefficients.put(ModelCoefficient.LEADER_BYTES_IN, parameters[leaderBytesInIndex]);
    if (!ignoreLeaderBytesOut) {
      _coefficients.put(ModelCoefficient.LEADER_BYTES_OUT, parameters[leaderBytesOutIndex]);
    }
    _coefficients.put(ModelCoefficient.FOLLOWER_BYTES_IN, parameters[followerBytesInIndex]);

    LOG.info("Coefficient generated: leader_bytes_in: {}, leader_bytes_out: {}, follower_bytes_in: {}",
             _coefficients.get(ModelCoefficient.LEADER_BYTES_IN),
             _coefficients.get(ModelCoefficient.LEADER_BYTES_OUT),
             _coefficients.get(ModelCoefficient.FOLLOWER_BYTES_IN));
    return true;
  } catch (Exception e) {
    LOG.warn("received exception {}", e);
  }
  return false;
}
 
Example 11
Source File: OLSRegressionTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void calculateOlsRegression(double[][] x, double[] y){
	OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
	regression.newSampleData(y, x);
	
	double[] beta = regression.estimateRegressionParameters();       
	double[] residuals = regression.estimateResiduals();
	double[][] parametersVariance = regression.estimateRegressionParametersVariance();
	double regressandVariance = regression.estimateRegressandVariance();
	double rSquared = regression.calculateRSquared();
	double sigma = regression.estimateRegressionStandardError();
}
 
Example 12
Source File: LinearRegressionModelParameters.java    From cruise-control with BSD 2-Clause "Simplified" License 4 votes vote down vote up
/**
 * @return Linear regression model state.
 */
public synchronized LinearRegressionModelState modelState() {
  Map<Integer, Double> detailCompleteness = new HashMap<>();
  for (Map.Entry<Integer, AtomicInteger> entry : INDICES.entrySet()) {
    detailCompleteness.put(entry.getKey(),
                           Math.min((double) entry.getValue().get() / NUM_OBSERVATIONS_PER_UTIL_BUCKET, 1.0));
  }
  Map<Integer, Integer> usedLeaderToFollowerRatio = new HashMap<>();
  Map<Integer, Integer> usedLeaderBytesInToBytesOutRatio = new HashMap<>();
  Map<ModelCoefficient, Double> coefficientFromAvailableData = new HashMap<>(_coefficients);
  OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
  regression.setNoIntercept(true);
  boolean ignoreLeaderBytesOutRate = !isLeaderBytesInAndOutRatioDiverseEnough();
  double[][] sampleBytesRateData = aggregateSampleBytesRateData(ignoreLeaderBytesOutRate);

  int leaderBytesInIndex = 0;
  int leaderBytesOutIndex = 1;
  int followerBytesInIndex = ignoreLeaderBytesOutRate ? 1 : 2;
  for (int i = 0; i < sampleBytesRateData.length; i++) {
    int leaderToFollowerRatio = sampleBytesRateData[i][followerBytesInIndex] == 0.0 ? 10000000 :
        (int) ((sampleBytesRateData[i][leaderBytesInIndex] / sampleBytesRateData[i][followerBytesInIndex]) * 10);
    int count = usedLeaderToFollowerRatio.getOrDefault(leaderToFollowerRatio, 0);
    usedLeaderToFollowerRatio.put(leaderToFollowerRatio, count + 1);

    if (!ignoreLeaderBytesOutRate) {
      int leaderBytesInToBytesOutRatio = sampleBytesRateData[i][leaderBytesOutIndex] == 0.0 ? 10000000 :
          (int) ((sampleBytesRateData[i][leaderBytesInIndex] / sampleBytesRateData[i][leaderBytesOutIndex]) * 10);
      count = usedLeaderBytesInToBytesOutRatio.getOrDefault(leaderBytesInToBytesOutRatio, 0);
      usedLeaderBytesInToBytesOutRatio.put(leaderBytesInToBytesOutRatio, count + 1);
    }
  }
  regression.newSampleData(aggregateSampleCpuUtilData(), sampleBytesRateData);
  double[] parameters = regression.estimateRegressionParameters();
  coefficientFromAvailableData.put(ModelCoefficient.LEADER_BYTES_IN, parameters[leaderBytesInIndex]);
  if (ignoreLeaderBytesOutRate) {
    coefficientFromAvailableData.put(ModelCoefficient.FOLLOWER_BYTES_IN, parameters[followerBytesInIndex]);
  } else {
    coefficientFromAvailableData.put(ModelCoefficient.LEADER_BYTES_OUT, parameters[leaderBytesOutIndex]);
    coefficientFromAvailableData.put(ModelCoefficient.FOLLOWER_BYTES_IN, parameters[followerBytesInIndex]);
  }
  return new LinearRegressionModelState(detailCompleteness, coefficientFromAvailableData,
                                        OBSERVED_LEADER_TO_FOLLOWER_BYTES_RATIO,
                                        OBSERVED_LEADER_BYTES_IN_TO_BYTES_OUT_RATIO,
                                        usedLeaderToFollowerRatio, usedLeaderBytesInToBytesOutRatio,
                                        CPU_UTIL_ESTIMATION_ERROR_STATS);
}
 
Example 13
Source File: Example6.java    From Java-Data-Analysis with MIT License 4 votes vote down vote up
public static void main(String[] args) {
    OLSMultipleLinearRegression mlr = new OLSMultipleLinearRegression();
    mlr.newSampleData(y, x);
    double[] b = mlr.estimateRegressionParameters();
    printResults(b);
}
 
Example 14
Source File: OLSRegressionEvaluator.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected MultipleLinearRegression regress(double[][] observations, double[] outcomes) {
  OLSMultipleLinearRegression olsMultipleLinearRegression = new OLSMultipleLinearRegression();
  olsMultipleLinearRegression.newSampleData(outcomes, observations);
  return olsMultipleLinearRegression;
}
 
Example 15
Source File: VIF.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
public DoubleMatrixDataset<String, String> vifCorrect(DoubleMatrixDataset<String, String> finalCovariates, double threshold) throws Exception {

		System.out.println("VIF: " + finalCovariates.rows() + " x " + finalCovariates.columns());

		// determine variance inflation factor
		System.out.println("Checking variance inflation factor...");
		HashSet<Integer> skipCol = new HashSet<>();
		boolean inflated = true;
		int iter = 0;
		while (inflated) {
			skipCol = new HashSet<>();

			for (int col = 0; col < finalCovariates.columns(); col++) {
				OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
				double[] y = finalCovariates.getCol(col).toArray(); //[row];
				// check if variance is >0
				double[][] otherCovariates = new double[finalCovariates.rows()][finalCovariates.columns() - 1];
				int colctr = 0;
				for (int col2 = 0; col2 < finalCovariates.columns(); col2++) {
					if (col != col2) {
						for (int row = 0; row < finalCovariates.rows(); row++) {
							otherCovariates[row][colctr] = finalCovariates.getElementQuick(row, col2);
						}
						colctr++;
					}
				}
				ols.newSampleData(y, otherCovariates);

				double rsq = ols.calculateRSquared();
				double vif = 1 / (1 - rsq);
				boolean alias = false;

				if (rsq > threshold) {
					alias = true;
					skipCol.add(col);
					System.out.println("Iteration: " + iter + "\tCovariate: " + finalCovariates.getColObjects().get(col) + "\tRSq: " + rsq + "\tVIF: " + vif + "\tAliased: " + alias);
					break;
				} else {
					System.out.println("Iteration: " + iter + "\tCovariate: " + finalCovariates.getColObjects().get(col) + "\tRSq: " + rsq + "\tVIF: " + vif + "\tAliased: " + alias);
				}
			}

			if (skipCol.isEmpty()) {
				System.out.println("There are no more collinear covariates.");
				inflated = false;
			} else {
				finalCovariates = excludeCols(finalCovariates, skipCol);
				inflated = true;
			}
			iter++;


		}

		return finalCovariates;
	}