Java Code Examples for org.apache.commons.math3.stat.ranking.NaturalRanking#rank()
The following examples show how to use
org.apache.commons.math3.stat.ranking.NaturalRanking#rank() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0 | 6 votes |
private void forceNormalCovariates(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes) throws ArithmeticException { System.out.println("Enforcing normal distribution on covariates"); NaturalRanking ranker = new NaturalRanking(); for (int p = 0; p < datasetCovariates.nrProbes; p++) { //Rank order the expression values: double[] values = new double[datasetCovariates.nrSamples]; for (int s = 0; s < datasetGenotypes.nrSamples; s++) { values[s] = datasetCovariates.rawData[p][s]; } double[] rankedValues = ranker.rank(values); //Replace the original expression value with the standard distribution enforce: for (int s = 0; s < datasetGenotypes.nrSamples; s++) { //Convert the rank to a proportion, with range <0, 1> double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); //Convert the pValue to a Z-Score: double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score } } }
Example 2
Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0 | 6 votes |
private void forceNormalExpressionData(ExpressionDataset datasetExpression) throws ArithmeticException { System.out.println("Enforcing normal distribution on expression data:"); NaturalRanking ranker = new NaturalRanking(); for (int p = 0; p < datasetExpression.nrProbes; p++) { //Rank order the expression values: double[] values = new double[datasetExpression.nrSamples]; for (int s = 0; s < datasetExpression.nrSamples; s++) { values[s] = datasetExpression.rawData[p][s]; } double[] rankedValues = ranker.rank(values); //Replace the original expression value with the standard distribution enforce: for (int s = 0; s < datasetExpression.nrSamples; s++) { //Convert the rank to a proportion, with range <0, 1> double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); //Convert the pValue to a Z-Score: double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score } } System.out.println("Expression data now force normal"); }
Example 3
Source File: XDataFrameRank.java From morpheus-core with Apache License 2.0 | 5 votes |
/** * Returns the rank array for the values specified * @param values the values to rank * @return the ranks of input array */ static double[] rank(double[] values) { final NaNStrategy nanStrategy = (NaNStrategy)optionsMap.get(NaNStrategy.class).get(DataFrameOptions.getNanStrategy()); final TiesStrategy tieStrategy = (TiesStrategy)optionsMap.get(TiesStrategy.class).get(DataFrameOptions.getTieStrategy()); if (nanStrategy == null) throw new DataFrameException("Unsupported NaN strategy specified: " + DataFrameOptions.getNanStrategy()); if (tieStrategy == null) throw new DataFrameException("Unsupported tie strategy specified: " + DataFrameOptions.getTieStrategy()); final NaturalRanking ranking = new NaturalRanking(nanStrategy, tieStrategy); return ranking.rank(values); }
Example 4
Source File: DenseVectors.java From cc-dbp with Apache License 2.0 | 4 votes |
public static double[] toRanks(double[] x) { NaturalRanking ranking = new NaturalRanking(); return ranking.rank(x); }
Example 5
Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0 | 4 votes |
private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariatesPCAForceNormal, int nrCompsToCorrectFor) throws Exception { System.out.println("Preparing data for testing eQTL effects of SNPs on covariate data:"); System.out.println("Correcting covariate data for cohort specific effects:"); ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples); datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames; // add covariates from the first list HashMap hashCovsToCorrect = new HashMap(); // add covariates from the second list for (int i = 0; i < covsToCorrect2.length; ++i) { String cov = covsToCorrect2[i]; hashCovsToCorrect.put(cov, null); Integer c = datasetCovariatesPCAForceNormal.hashProbes.get(cov); if (c == null) { throw new Exception("Covariate not found: " + cov); } for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariatesPCAForceNormal.rawData[c][s]; } } int[] covsToCorrectIndex = new int[covsToCorrect.length]; for (int c = 0; c < covsToCorrect.length; c++) { hashCovsToCorrect.put(covsToCorrect[c], null); covsToCorrectIndex[c] = ((Integer) datasetCovariatesPCAForceNormal.hashProbes.get(covsToCorrect[c])).intValue(); for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariatesPCAForceNormal.rawData[covsToCorrectIndex[c]][s]; } } // add PCs if (nrCompsToCorrectFor > 0) { for (int comp = 0; comp < nrCompsToCorrectFor; comp++) { for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariatesPCAForceNormal.rawData[datasetCovariatesPCAForceNormal.nrProbes - 51 + comp][s]; } } } datasetCovariatesToCorrectFor.transposeDataset(); datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt"); orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt"); datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt"); datasetCovariatesToCorrectFor.transposeDataset(); ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt"); for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) { if (!hashCovsToCorrect.containsKey(datasetCovariatesPCAForceNormal.probeNames[p])) { for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) { if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) { double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariatesPCAForceNormal.rawData[p]); for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesPCAForceNormal.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s]; } } } /*double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]); double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]); if (stdev < 1E-5) { for (int s = 0; s < datasetGenotypes.nrSamples; s++) { datasetCovariatesPCAForceNormal.rawData[p][s] = mean; } }*/ } } System.out.println("Enforcing normal distribution on covariates"); NaturalRanking ranker = new NaturalRanking(); for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) { //Rank order the expression values: double[] values = new double[datasetCovariatesPCAForceNormal.nrSamples]; for (int s = 0; s < datasetGenotypes.nrSamples; s++) { values[s] = datasetCovariatesPCAForceNormal.rawData[p][s]; } double[] rankedValues = ranker.rank(values); //Replace the original expression value with the standard distribution enforce: for (int s = 0; s < datasetGenotypes.nrSamples; s++) { //Convert the rank to a proportion, with range <0, 1> double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); //Convert the pValue to a Z-Score: double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue); datasetCovariatesPCAForceNormal.rawData[p][s] = zScore; //Replace original expression value with the Z-Score } } return datasetCovariatesPCAForceNormal; }
Example 6
Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0 | 3 votes |
public DoubleMatrixDataset<R, C> createRowForceNormalDuplicate() { DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); for (int r = 0; r < matrix.rows(); ++r) { double[] row = matrix.viewRow(r).toArray(); double mean = JSci.maths.ArrayMath.mean(row); double stdev = JSci.maths.ArrayMath.standardDeviation(row); double[] rankedValues = ranking.rank(row); for (int s = 0; s < matrix.columns(); s++) { double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); newDataset.setElementQuick(r, s, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev); } } return newDataset; }
Example 7
Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0 | 3 votes |
public DoubleMatrixDataset<R, C> createColumnForceNormalDuplicate() { DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols); NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE); for (int c = 0; c < matrix.columns(); ++c) { double[] col = matrix.viewColumn(c).toArray(); double mean = JSci.maths.ArrayMath.mean(col); double stdev = JSci.maths.ArrayMath.standardDeviation(col); double[] rankedValues = ranking.rank(col); for (int s = 0; s < matrix.rows(); s++) { double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length); newDataset.setElementQuick(s, c, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev); } } return newDataset; }