org.apache.commons.math3.stat.ranking.NaturalRanking#rank

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

6 votes

private void forceNormalCovariates(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes) throws ArithmeticException {
	System.out.println("Enforcing normal distribution on covariates");

	NaturalRanking ranker = new NaturalRanking();

	for (int p = 0; p < datasetCovariates.nrProbes; p++) {
		//Rank order the expression values:
		double[] values = new double[datasetCovariates.nrSamples];
		for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
			values[s] = datasetCovariates.rawData[p][s];
		}
		double[] rankedValues = ranker.rank(values);
		//Replace the original expression value with the standard distribution enforce:
		for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
			//Convert the rank to a proportion, with range <0, 1>
			double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
			//Convert the pValue to a Z-Score:
			double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
			datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
		}
	}
}

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

6 votes

private void forceNormalExpressionData(ExpressionDataset datasetExpression) throws ArithmeticException {
	System.out.println("Enforcing normal distribution on expression data:");

	NaturalRanking ranker = new NaturalRanking();

	for (int p = 0; p < datasetExpression.nrProbes; p++) {
		//Rank order the expression values:
		double[] values = new double[datasetExpression.nrSamples];
		for (int s = 0; s < datasetExpression.nrSamples; s++) {
			values[s] = datasetExpression.rawData[p][s];
		}

		double[] rankedValues = ranker.rank(values);
		//Replace the original expression value with the standard distribution enforce:
		for (int s = 0; s < datasetExpression.nrSamples; s++) {
			//Convert the rank to a proportion, with range <0, 1>
			double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
			//Convert the pValue to a Z-Score:
			double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
			datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
		}
	}

	System.out.println("Expression data now force normal");
}

Source File: XDataFrameRank.java From morpheus-core with Apache License 2.0

5 votes

/**
 * Returns the rank array for the values specified
 * @param values    the values to rank
 * @return          the ranks of input array
 */
static double[] rank(double[] values) {
    final NaNStrategy nanStrategy = (NaNStrategy)optionsMap.get(NaNStrategy.class).get(DataFrameOptions.getNanStrategy());
    final TiesStrategy tieStrategy = (TiesStrategy)optionsMap.get(TiesStrategy.class).get(DataFrameOptions.getTieStrategy());
    if (nanStrategy == null) throw new DataFrameException("Unsupported NaN strategy specified: " + DataFrameOptions.getNanStrategy());
    if (tieStrategy == null) throw new DataFrameException("Unsupported tie strategy specified: " + DataFrameOptions.getTieStrategy());
    final NaturalRanking ranking = new NaturalRanking(nanStrategy, tieStrategy);
    return ranking.rank(values);
}

Source File: DenseVectors.java From cc-dbp with Apache License 2.0

4 votes

public static double[] toRanks(double[] x) {
	NaturalRanking ranking = new NaturalRanking();
	return ranking.rank(x);
}

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

4 votes

private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariatesPCAForceNormal, int nrCompsToCorrectFor) throws Exception {

		System.out.println("Preparing data for testing eQTL effects of SNPs on covariate data:");
		System.out.println("Correcting covariate data for cohort specific effects:");

		ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples);
		datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames;

		// add covariates from the first list
		HashMap hashCovsToCorrect = new HashMap();

		// add covariates from the second list
		for (int i = 0; i < covsToCorrect2.length; ++i) {
			String cov = covsToCorrect2[i];
			hashCovsToCorrect.put(cov, null);
			Integer c = datasetCovariatesPCAForceNormal.hashProbes.get(cov);
			if (c == null) {
				throw new Exception("Covariate not found: " + cov);
			}
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariatesPCAForceNormal.rawData[c][s];
			}
		}

		int[] covsToCorrectIndex = new int[covsToCorrect.length];
		for (int c = 0; c < covsToCorrect.length; c++) {
			hashCovsToCorrect.put(covsToCorrect[c], null);
			covsToCorrectIndex[c] = ((Integer) datasetCovariatesPCAForceNormal.hashProbes.get(covsToCorrect[c])).intValue();
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariatesPCAForceNormal.rawData[covsToCorrectIndex[c]][s];
			}
		}

		// add PCs
		if (nrCompsToCorrectFor > 0) {
			for (int comp = 0; comp < nrCompsToCorrectFor; comp++) {
				for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
					datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariatesPCAForceNormal.rawData[datasetCovariatesPCAForceNormal.nrProbes - 51 + comp][s];
				}
			}
		}

		datasetCovariatesToCorrectFor.transposeDataset();

		datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt");
		orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt");
		datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt");
		datasetCovariatesToCorrectFor.transposeDataset();
		ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt");

		for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) {
			if (!hashCovsToCorrect.containsKey(datasetCovariatesPCAForceNormal.probeNames[p])) {
				for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) {
					if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) {
						double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariatesPCAForceNormal.rawData[p]);
						for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
							datasetCovariatesPCAForceNormal.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s];
						}
					}
				}
				/*double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]);
				 double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]);
				 if (stdev < 1E-5) {
				 for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				 datasetCovariatesPCAForceNormal.rawData[p][s] = mean;
				 }
				 }*/
			}
		}

		System.out.println("Enforcing normal distribution on covariates");

		NaturalRanking ranker = new NaturalRanking();

		for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) {
			//Rank order the expression values:
			double[] values = new double[datasetCovariatesPCAForceNormal.nrSamples];
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				values[s] = datasetCovariatesPCAForceNormal.rawData[p][s];
			}
			double[] rankedValues = ranker.rank(values);
			//Replace the original expression value with the standard distribution enforce:
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				//Convert the rank to a proportion, with range <0, 1>
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
				//Convert the pValue to a Z-Score:
				double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
				datasetCovariatesPCAForceNormal.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
			}
		}
		return datasetCovariatesPCAForceNormal;
	}

Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0

3 votes

public DoubleMatrixDataset<R, C> createRowForceNormalDuplicate() {

		DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols);

		NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED,
				TiesStrategy.AVERAGE);

		for (int r = 0; r < matrix.rows(); ++r) {

			double[] row = matrix.viewRow(r).toArray();

			double mean = JSci.maths.ArrayMath.mean(row);
			double stdev = JSci.maths.ArrayMath.standardDeviation(row);

			double[] rankedValues = ranking.rank(row);

			for (int s = 0; s < matrix.columns(); s++) {
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);

				newDataset.setElementQuick(r, s, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev);
			}

		}

		return newDataset;

	}

Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0

3 votes

public DoubleMatrixDataset<R, C> createColumnForceNormalDuplicate() {

		DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols);

		NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED,
				TiesStrategy.AVERAGE);

		for (int c = 0; c < matrix.columns(); ++c) {

			double[] col = matrix.viewColumn(c).toArray();

			double mean = JSci.maths.ArrayMath.mean(col);
			double stdev = JSci.maths.ArrayMath.standardDeviation(col);

			double[] rankedValues = ranking.rank(col);

			for (int s = 0; s < matrix.rows(); s++) {
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);

				newDataset.setElementQuick(s, c, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev);
			}

		}

		return newDataset;

	}

Java Code Examples for org.apache.commons.math3.stat.ranking.NaturalRanking#rank()