org.apache.commons.math3.stat.descriptive.rank.Percentile Java Examples
The following examples show how to use
org.apache.commons.math3.stat.descriptive.rank.Percentile.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>IllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws IllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile */ public double getPercentile(double p) { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #2
Source File: PercentileClassifier.java From macrobase with Apache License 2.0 | 6 votes |
@Override public void process(DataFrame input) { double[] metrics = input.getDoubleColumnByName(columnName); int len = metrics.length; lowCutoff = new Percentile().evaluate(metrics, percentile); highCutoff = new Percentile().evaluate(metrics, 100.0 - percentile); output = input.copy(); double[] resultColumn = new double[len]; for (int i = 0; i < len; i++) { double curVal = metrics[i]; if ((curVal > highCutoff && includeHigh) || (curVal < lowCutoff && includeLow) ) { resultColumn[i] = 1.0; } } output.addColumn(outputColumnName, resultColumn); }
Example #3
Source File: HDF5PCACoveragePoNCreationUtilsUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider="readCountAndPercentileData") public void testSubsetTargetToUsableOnes(final ReadCountCollection readCount, final double percentile) { final Median median = new Median(); final RealMatrix counts = readCount.counts(); final double[] targetMedians = IntStream.range(0, counts.getRowDimension()) .mapToDouble(i -> median.evaluate(counts.getRow(i))).toArray(); final double threshold = new Percentile(percentile).evaluate(targetMedians); final Boolean[] toBeKept = DoubleStream.of(targetMedians) .mapToObj(d -> d >= threshold).toArray(Boolean[]::new); final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count(); final Pair<ReadCountCollection, double[]> result = HDF5PCACoveragePoNCreationUtils.subsetReadCountsToUsableTargets(readCount, percentile, NULL_LOGGER); Assert.assertEquals(result.getLeft().targets().size(), toBeKeptCount); Assert.assertEquals(result.getRight().length, toBeKeptCount); int nextIndex = 0; for (int i = 0; i < toBeKept.length; i++) { if (toBeKept[i]) { int index = result.getLeft().targets().indexOf(readCount.targets().get(i)); Assert.assertEquals(index, nextIndex++); Assert.assertEquals(counts.getRow(i), result.getLeft().counts().getRow(index)); Assert.assertEquals(result.getRight()[index], targetMedians[i]); } else { Assert.assertEquals(result.getLeft().targets().indexOf(readCount.targets().get(i)), -1); } } }
Example #4
Source File: ReadCountCollectionUtilsUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider="readCountAndPercentileData") public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) { final Median median = new Median(); final RealMatrix counts = readCount.counts(); final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()) .mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray(); final double top = new Percentile(100 - percentile).evaluate(columnMedians); final double bottom = new Percentile(percentile).evaluate(columnMedians); final Boolean[] toBeKept = DoubleStream.of(columnMedians) .mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new); final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count(); final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER); Assert.assertEquals(result.columnNames().size(), toBeKeptCount); int nextIndex = 0; for (int i = 0; i < toBeKept.length; i++) { if (toBeKept[i]) { int index = result.columnNames().indexOf(readCount.columnNames().get(i)); Assert.assertEquals(index, nextIndex++); Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index)); } else { Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1); } } }
Example #5
Source File: ReadCountCollectionUtilsUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider="readCountAndPercentileData") public void testTruncateExtremeCounts(final ReadCountCollection readCount, final double percentile) { final RealMatrix counts = readCount.counts(); final double[] allCounts = Stream.of(counts.getData()) .flatMap(row -> DoubleStream.of(row).boxed()) .mapToDouble(Double::doubleValue).toArray(); final double bottom = new Percentile(percentile).evaluate(allCounts); final double top = new Percentile(100 - percentile).evaluate(allCounts); final double[][] expected = new double[counts.getRowDimension()][]; for (int i = 0; i < expected.length; i++) { expected[i] = DoubleStream.of(counts.getRow(i)).map(d -> d < bottom ? bottom : (d > top) ? top : d).toArray(); } ReadCountCollectionUtils.truncateExtremeCounts(readCount, percentile, NULL_LOGGER); final RealMatrix newCounts = readCount.counts(); Assert.assertEquals(newCounts.getRowDimension(), newCounts.getRowDimension()); Assert.assertEquals(newCounts.getColumnDimension(), newCounts.getColumnDimension()); for (int i = 0; i < expected.length; i++) { for (int j = 0; j < expected[i].length; j++) { Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j]); } } }
Example #6
Source File: HDF5PCACoveragePoNCreationUtils.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Subsets targets in the input count to the usable ones based on the percentile threshold indicated * by the user. * * <p> * It returns a pair of object, where the left one is the updated read-counts with only the usable * targets, and the right one is the corresponding target factors. * </p> * * @param readCounts the input read-counts. * @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful. * @return never {@code null}. */ @VisibleForTesting static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts, final double targetFactorPercentileThreshold, final Logger logger) { final double[] targetFactors = calculateTargetFactors(readCounts); final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors); final List<Target> targetByIndex = readCounts.targets(); final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold) .mapToObj(targetByIndex::get) .collect(Collectors.toCollection(LinkedHashSet::new)); if (result.size() == targetByIndex.size()) { logger.info(String.format("All %d targets are kept", targetByIndex.size())); return new ImmutablePair<>(readCounts, targetFactors); } else { final int discardedCount = targetFactors.length - result.size(); logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold )); final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray(); return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset); } }
Example #7
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #8
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #9
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #10
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>IllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws IllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile */ public double getPercentile(double p) { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #11
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #12
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #13
Source File: DescriptiveStatistics.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Returns an estimate for the pth percentile of the stored values. * <p> * The implementation provided here follows the first estimation procedure presented * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * </p><p> * <strong>Preconditions</strong>:<ul> * <li><code>0 < p ≤ 100</code> (otherwise an * <code>MathIllegalArgumentException</code> is thrown)</li> * <li>at least one value must be stored (returns <code>Double.NaN * </code> otherwise)</li> * </ul></p> * * @param p the requested percentile (scaled from 0 - 100) * @return An estimate for the pth percentile of the stored data * @throws MathIllegalStateException if percentile implementation has been * overridden and the supplied implementation does not support setQuantile * @throws MathIllegalArgumentException if p is not a valid quantile */ public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { if (percentileImpl instanceof Percentile) { ((Percentile) percentileImpl).setQuantile(p); } else { try { percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, new Class[] {Double.TYPE}).invoke(percentileImpl, new Object[] {Double.valueOf(p)}); } catch (NoSuchMethodException e1) { // Setter guard should prevent throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); } catch (IllegalAccessException e2) { throw new MathIllegalStateException( LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); } catch (InvocationTargetException e3) { throw new IllegalStateException(e3.getCause()); } } return apply(percentileImpl); }
Example #14
Source File: MomentSolverTest.java From momentsketch with Apache License 2.0 | 6 votes |
@Test public void testFromRaw() { int n = 1000; double[] xVals = new double[n]; for (int i = 0 ; i < n; i++) { xVals[i] = i; } MomentStruct mData = new MomentStruct(10); mData.add(xVals); MomentSolver ms = new MomentSolver(mData); ms.setGridSize(1024); ms.solve(); double q = ms.getQuantile(.9); Percentile p = new Percentile(); p.setData(xVals); double truep90 = p.evaluate(90.0); assertEquals(truep90, q, 1.0); double[] ps = {0, .1, .5, .9, 1.0}; double[] qs = ms.getQuantiles(ps); assertEquals(0.0, qs[0], 1.0); assertEquals(truep90, qs[3], 1.0); }
Example #15
Source File: QuantileClassifierTest.java From macrobase with Apache License 2.0 | 5 votes |
@Test public void testClassify() throws Exception { assertEquals(length, df.getNumRows()); QuantileClassifier ac = new QuantileClassifier( "count", quantileColumnsMap ); ac.process(df); DataFrame output = ac.getResults(); assertEquals(df.getNumRows(), output.getNumRows()); assertEquals(7, df.getSchema().getNumColumns()); assertEquals(8, output.getSchema().getNumColumns()); Percentile percentile = new Percentile(); percentile.setData(rawData); double trueLowCutoff = percentile.evaluate(1); double trueHighCutoff = percentile.evaluate(99); assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0); assertEquals(trueHighCutoff, ac.getHighCutoff(), 5.0); double[] outliers = output.getDoubleColumnByName("_OUTLIER"); for (int i = 0; i < outliers.length; i++) { int trueNumOutliers = 0; double[] rawGroup = rawGroups.get(i); for (int j = 0; j < rawGroup.length; j++) { if (rawGroup[j] < trueLowCutoff || rawGroup[j] > trueHighCutoff) { trueNumOutliers++; } } assertEquals(trueNumOutliers, outliers[i], 5.0); } }
Example #16
Source File: QuantileClassifierTest.java From macrobase with Apache License 2.0 | 5 votes |
@Test public void testConfigure() throws Exception { QuantileClassifier ac = new QuantileClassifier( "col1", new LinkedHashMap<>() ); ac.setCountColumnName("count"); ac.setQuantileColumnNames(quantileColumnNames); ac.setQuantiles(quantiles); ac.setIncludeHigh(false); ac.setIncludeLow(true); ac.setOutputColumnName("_OUT"); ac.setPercentile(5.0); ac.process(df); DataFrame output = ac.getResults(); assertEquals(df.getNumRows(), output.getNumRows()); Percentile percentile = new Percentile(); percentile.setData(rawData); double trueLowCutoff = percentile.evaluate(5); assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0); double[] outliers = output.getDoubleColumnByName("_OUT"); for (int i = 0; i < outliers.length; i++) { int trueNumOutliers = 0; double[] rawGroup = rawGroups.get(i); for (int j = 0; j < rawGroup.length; j++) { if (rawGroup[j] < trueLowCutoff) { trueNumOutliers++; } } assertEquals(trueNumOutliers, outliers[i], 5.0); } }
Example #17
Source File: DecileCollection.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}. * @param samples list of samples (caution should be used if this contains NaN or infinite values) */ public DecileCollection(final List<Double> samples) { Utils.nonNull(samples); Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples."); final Percentile percentile = new Percentile(); percentile.setData(Doubles.toArray(samples)); final Decile[] decileKeys = Decile.values(); for (int i = 1; i < 10; i++) { final double decile = percentile.evaluate(10 * i); deciles.put(decileKeys[i - 1], decile); } }
Example #18
Source File: CNLOHCaller.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
private double calculateSegmentMeanBiasInCRSpace(final List<ACNVModeledSegment> segments) { Utils.nonNull(segments); final double neutralCRApprox = 1; // Only consider values "close enough" to copy neutral (CR == 1). final double[] neutralSegmentMeans = segments.stream().mapToDouble(ACNVModeledSegment::getSegmentMeanInCRSpace) .filter(x -> Math.abs(x - neutralCRApprox) < CLOSE_ENOUGH_TO_COPY_NEUTRAL_IN_CR) .toArray(); return new Percentile().evaluate(neutralSegmentMeans) - 1; }
Example #19
Source File: QuantileAggregator.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public QuantileAggregator(final double quantile) { if (quantile < 0 || quantile > 1) { throw new IllegalArgumentException("Quantile values have to be in [0, 1]"); } this.maxQuantile = new Percentile(1 - quantile); this.minQuantile = new Percentile(quantile); }
Example #20
Source File: MovingMedianEvaluator.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Object doWork(Object first, Object second) throws IOException{ if(null == first){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory))); } if(null == second){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory))); } if(!(first instanceof List<?>)){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a List",toExpression(constructingFactory), first.getClass().getSimpleName())); } if(!(second instanceof Number)){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName())); } List<?> values = (List<?>)first; int window = ((Number)second).intValue(); List<Number> moving = new ArrayList<>(); DescriptiveStatistics slider = new DescriptiveStatistics(window); Percentile percentile = new Percentile(); for(Object value : values){ slider.addValue(((Number)value).doubleValue()); if(slider.getN() >= window){ double median = percentile.evaluate(slider.getValues(), 50); moving.add(median); } } return moving; }
Example #21
Source File: DecileCollection.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}. * @param samples list of samples (caution should be used if this contains NaN or infinite values) */ public DecileCollection(final List<Double> samples) { Utils.nonNull(samples); Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples."); final Percentile percentile = new Percentile(); percentile.setData(Doubles.toArray(samples)); final Decile[] decileKeys = Decile.values(); for (int i = 1; i < 10; i++) { final double decile = percentile.evaluate(10 * i); deciles.put(decileKeys[i - 1], decile); } }
Example #22
Source File: Winsorizer.java From macrobase with Apache License 2.0 | 5 votes |
public List<double[]> process(List<double[]> metrics) { int n = metrics.size(); int k = metrics.get(0).length; Percentile p = new Percentile(); bounds = new double[k][2]; List<double[]> newMetrics = new ArrayList<>(n); for (int i = 0; i < n; i++) { newMetrics.add(new double[k]); } double[] curDimensionValues = new double[n]; for (int j = 0; j < k; j++) { for (int i = 0; i < n; i++) { curDimensionValues[i] = metrics.get(i)[j]; } p.setData(curDimensionValues); bounds[j][0] = p.evaluate(trimPct); bounds[j][1] = p.evaluate(100 - trimPct); for (int i = 0; i < n; i++) { double curValue = curDimensionValues[i]; if (curValue > bounds[j][1]) { newMetrics.get(i)[j] = bounds[j][1]; } else if (curValue < bounds[j][0]) { newMetrics.get(i)[j] = bounds[j][0]; } else { newMetrics.get(i)[j] = curValue; } } } return newMetrics; }
Example #23
Source File: PercentileAggregator.java From rapidminer-studio with GNU Affero General Public License v3.0 | 4 votes |
@Override protected double getValue() { Percentile percentileCalc = new Percentile(); percentileCalc.setData(ArrayUtils.toPrimitive(elements.toArray(new Double[0]))); return percentileCalc.evaluate(percentile); }
Example #24
Source File: ComplexDoubleVector.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 4 votes |
@Override public double doublePercentile(int percentile){ if(this.size == 0){ throw new IllegalStateException(); } double[] data = new double[this.size]; System.arraycopy(this.values, 0, data, 0, data.length); Arrays.sort(data); Percentile statistic = new Percentile(); statistic.setData(data); return statistic.evaluate(percentile); }
Example #25
Source File: ContaminationModel.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private Pair<Double, Double> calculateContamination(final Strategy strategy, final List<PileupSummary> tumorSites, final double minMaf) { final boolean useHomAlt = strategy == Strategy.HOM_ALT; final List<PileupSummary> genotypingHoms; if (strategy == Strategy.HOM_ALT) { genotypingHoms = homAlts(minMaf); } else if (strategy == Strategy.HOM_REF) { genotypingHoms = homRefs(minMaf); } else { final List<PileupSummary> candidateHomRefs = tumorSites.stream() .filter(site -> site.getAltFraction() < UNSCRUPULOUS_HOM_REF_ALLELE_FRACTION) .collect(Collectors.toList()); final double altFractionThreshold = Math.max(MINIMUM_UNSCRUPULOUS_HOM_REF_ALT_FRACTION_THRESHOLD, new Percentile(UNSCRUPULOUS_HOM_REF_PERCENTILE).evaluate(candidateHomRefs.stream().mapToDouble(PileupSummary::getAltFraction).toArray())); genotypingHoms = candidateHomRefs.stream().filter(site -> site.getAltFraction() <= altFractionThreshold).collect(Collectors.toList()); } final List<PileupSummary> homs = subsetSites(tumorSites, genotypingHoms); final double tumorErrorRate = calculateErrorRate(tumorSites); // depth of ref in hom alt or alt in hom ref final ToIntFunction<PileupSummary> oppositeCount = useHomAlt ? PileupSummary::getRefCount : PileupSummary::getAltCount; final ToDoubleFunction<PileupSummary> oppositeAlleleFrequency = useHomAlt ? PileupSummary::getRefFrequency : PileupSummary::getAlleleFrequency; final long totalDepth = homs.stream().mapToLong(PileupSummary::getTotalCount).sum(); // total reaad count of ref in hom alt or alt in hom ref, as the case may be final long oppositeDepth = homs.stream().mapToLong(oppositeCount::applyAsInt).sum(); final long errorDepth = Math.round(totalDepth * tumorErrorRate / 3); final long contaminationOppositeDepth = Math.max(oppositeDepth - errorDepth, 0); final double totalDepthWeightedByOppositeFrequency = homs.stream() .mapToDouble(ps -> ps.getTotalCount() * oppositeAlleleFrequency.applyAsDouble(ps)) .sum(); final double contamination = contaminationOppositeDepth / totalDepthWeightedByOppositeFrequency; final double stdError = homs.isEmpty() ? 1 : Math.sqrt(homs.stream().mapToDouble(ps -> { final double d = ps.getTotalCount(); final double f = 1 - oppositeAlleleFrequency.applyAsDouble(ps); return (1 - f) * d * contamination * ((1 - contamination) + f * d * contamination); }).sum()) / totalDepthWeightedByOppositeFrequency; return Pair.of(Math.min(contamination, 1.0), stdError); }
Example #26
Source File: SlowBrokerFinder.java From cruise-control with BSD 2-Clause "Simplified" License | 4 votes |
public SlowBrokerFinder() { _brokerSlownessScore = new HashMap<>(); _detectedSlowBrokers = new HashMap<>(); _percentile = new Percentile(); }
Example #27
Source File: DescriptiveStatisticsTest.java From astor with GNU General Public License v2.0 | 4 votes |
@Override public Percentile copy() { subPercentile result = new subPercentile(); return result; }
Example #28
Source File: PercentileMetricAnomalyFinder.java From cruise-control with BSD 2-Clause "Simplified" License | 4 votes |
public PercentileMetricAnomalyFinder() { _percentile = new Percentile(); }
Example #29
Source File: DescriptiveStatisticsTest.java From astor with GNU General Public License v2.0 | 4 votes |
@Override public Percentile copy() { subPercentile result = new subPercentile(); return result; }
Example #30
Source File: DescriptiveStatisticsTest.java From astor with GNU General Public License v2.0 | 4 votes |
@Override public Percentile copy() { subPercentile result = new subPercentile(); return result; }