Python numpy.ma.fix_invalid() Examples
The following are 19
code examples of numpy.ma.fix_invalid().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy.ma
, or try the search function
.
Example #1
Source File: mstats_basic.py From Computable with MIT License | 6 votes |
def pointbiserialr(x, y): x = ma.fix_invalid(x, copy=True).astype(bool) y = ma.fix_invalid(y, copy=True).astype(float) # Get rid of the missing data .......... m = ma.mask_or(ma.getmask(x), ma.getmask(y)) if m is not nomask: unmask = np.logical_not(m) x = x[unmask] y = y[unmask] # n = len(x) # phat is the fraction of x values that are True phat = x.sum() / float(n) y0 = y[~x] # y-values where x is False y1 = y[x] # y-values where x is True y0m = y0.mean() y1m = y1.mean() # rpb = (y1m - y0m)*np.sqrt(phat * (1-phat)) / y.std() # df = n-2 t = rpb*ma.sqrt(df/(1.0-rpb**2)) prob = betai(0.5*df, 0.5, df/(df+t*t)) return rpb, prob
Example #2
Source File: test_mstats_basic.py From Computable with MIT License | 6 votes |
def test_spearmanr(self): "Tests some computations of Spearman's rho" (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95]) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) # x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan] y = [22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
Example #3
Source File: test_mstats_basic.py From Computable with MIT License | 6 votes |
def test_kendalltau(self): "Tests some computations of Kendall's tau" x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
Example #4
Source File: test_mstats_basic.py From Computable with MIT License | 6 votes |
def test_friedmanchisq(self): "Tests the Friedman Chi-square test" # No missing values args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0], [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0], [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0]) result = mstats.friedmanchisquare(*args) assert_almost_equal(result[0], 10.4737, 4) assert_almost_equal(result[1], 0.005317, 6) # Missing values x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x) result = mstats.friedmanchisquare(*x) assert_almost_equal(result[0], 2.0156, 4) assert_almost_equal(result[1], 0.5692, 4)
Example #5
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_friedmanchisq(self): # No missing values args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0], [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0], [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0]) result = mstats.friedmanchisquare(*args) assert_almost_equal(result[0], 10.4737, 4) assert_almost_equal(result[1], 0.005317, 6) # Missing values x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x) result = mstats.friedmanchisquare(*x) assert_almost_equal(result[0], 2.0156, 4) assert_almost_equal(result[1], 0.5692, 4) # test for namedtuple attributes attributes = ('statistic', 'pvalue') check_named_results(result, attributes, ma=True)
Example #6
Source File: test_mstats_basic.py From Computable with MIT License | 5 votes |
def test_kendalltau_seasonal(self): "Tests the seasonal Kendall tau." x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T output = mstats.kendalltau_seasonal(x) assert_almost_equal(output['global p-value (indep)'], 0.008, 3) assert_almost_equal(output['seasonal p-value'].round(2), [0.18,0.53,0.20,0.04])
Example #7
Source File: test_mstats_basic.py From Computable with MIT License | 5 votes |
def test_kstwosamp(self): "Tests the Kolmogorov-Smirnov 2 samples test" x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter,spring,summer,fall) = x.T assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4), (0.1818,0.9892)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4), (0.1469,0.7734)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4), (0.1818,0.6744))
Example #8
Source File: create_phylowgs_inputs.py From phylowgs with GNU General Public License v3.0 | 5 votes |
def impute_missing_total_reads(total_reads, missing_variant_confidence): # Change NaNs to masked values via SciPy. masked_total_reads = ma.fix_invalid(total_reads) # Going forward, suppose you have v variants and s samples in a v*s matrix of # read counts. Missing values are masked. # Calculate geometric mean of variant read depth in each sample. Result: s*1 sample_means = gmean(masked_total_reads, axis=0) assert np.sum(sample_means <= 0) == np.sum(np.isnan(sample_means)) == 0 # Divide every variant's read count by its mean sample read depth to get read # depth enrichment relative to other variants in sample. Result: v*s normalized_to_sample = np.dot(masked_total_reads, np.diag(1./sample_means)) # For each variant, calculate geometric mean of its read depth enrichment # across samples. Result: v*1 variant_mean_reads = gmean(normalized_to_sample, axis=1) assert np.sum(variant_mean_reads <= 0) == np.sum(np.isnan(variant_mean_reads)) == 0 # Convert 1D arrays to vectors to permit matrix multiplication. imputed_counts = np.dot(variant_mean_reads.reshape((-1, 1)), sample_means.reshape((1, -1))) nan_coords = np.where(np.isnan(total_reads)) total_reads[nan_coords] = imputed_counts[nan_coords] assert np.sum(total_reads <= 0) == np.sum(np.isnan(total_reads)) == 0 total_reads[nan_coords] *= missing_variant_confidence return np.floor(total_reads).astype(np.int)
Example #9
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_spearmanr(self): # Tests some computations of Spearman's rho (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95]) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) # Next test is to make sure calculation uses sufficient precision. # The denominator's value is ~n^3 and used to be represented as an # int. 2000**3 > 2**32 so these arrays would cause overflow on # some machines. x = list(range(2000)) y = list(range(2000)) y[0], y[9] = y[9], y[0] y[10], y[434] = y[434], y[10] y[435], y[1509] = y[1509], y[435] # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1)) # = 1 - (1 / 500) # = 0.998 assert_almost_equal(mstats.spearmanr(x,y)[0], 0.998) # test for namedtuple attributes res = mstats.spearmanr(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
Example #10
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) # make sure internal variable use correct precision with # larger arrays x = np.arange(2000, dtype=float) x = ma.masked_greater(x, 1995) y = np.arange(2000, dtype=float) y = np.concatenate((y[1000:], y[:1000])) assert_(np.isfinite(mstats.kendalltau(x,y)[1])) # test for namedtuple attributes res = mstats.kendalltau(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
Example #11
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_kendalltau_seasonal(self): # Tests the seasonal Kendall tau. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T output = mstats.kendalltau_seasonal(x) assert_almost_equal(output['global p-value (indep)'], 0.008, 3) assert_almost_equal(output['seasonal p-value'].round(2), [0.18,0.53,0.20,0.04])
Example #12
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_kstwosamp(self): x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter,spring,summer,fall) = x.T assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4), (0.1818,0.9892)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4), (0.1469,0.7734)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4), (0.1818,0.6744))
Example #13
Source File: mstats_basic.py From GraphicDesignPatternByPython with MIT License | 4 votes |
def pointbiserialr(x, y): """Calculates a point biserial correlation coefficient and its p-value. Parameters ---------- x : array_like of bools Input array. y : array_like Input array. Returns ------- correlation : float R value pvalue : float 2-tailed p-value Notes ----- Missing values are considered pair-wise: if a value is missing in x, the corresponding value in y is masked. For more details on `pointbiserialr`, see `stats.pointbiserialr`. """ x = ma.fix_invalid(x, copy=True).astype(bool) y = ma.fix_invalid(y, copy=True).astype(float) # Get rid of the missing data m = ma.mask_or(ma.getmask(x), ma.getmask(y)) if m is not nomask: unmask = np.logical_not(m) x = x[unmask] y = y[unmask] n = len(x) # phat is the fraction of x values that are True phat = x.sum() / float(n) y0 = y[~x] # y-values where x is False y1 = y[x] # y-values where x is True y0m = y0.mean() y1m = y1.mean() rpb = (y1m - y0m)*np.sqrt(phat * (1-phat)) / y.std() df = n-2 t = rpb*ma.sqrt(df/(1.0-rpb**2)) prob = _betai(0.5*df, 0.5, df/(df+t*t)) return PointbiserialrResult(rpb, prob)
Example #14
Source File: mstats_extras.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int, optional Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. """ def _hdsd_1D(data,prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) #......... hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan vv = np.arange(n) / float(n-1) betacdf = beta.cdf for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)), list(range(k+1,n))].astype(int_)]) for k in range(n)], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) return ma.fix_invalid(result, copy=False).ravel()
Example #15
Source File: mstats_basic.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def pointbiserialr(x, y): """Calculates a point biserial correlation coefficient and its p-value. Parameters ---------- x : array_like of bools Input array. y : array_like Input array. Returns ------- correlation : float R value pvalue : float 2-tailed p-value Notes ----- Missing values are considered pair-wise: if a value is missing in x, the corresponding value in y is masked. For more details on `pointbiserialr`, see `stats.pointbiserialr`. """ x = ma.fix_invalid(x, copy=True).astype(bool) y = ma.fix_invalid(y, copy=True).astype(float) # Get rid of the missing data m = ma.mask_or(ma.getmask(x), ma.getmask(y)) if m is not nomask: unmask = np.logical_not(m) x = x[unmask] y = y[unmask] n = len(x) # phat is the fraction of x values that are True phat = x.sum() / float(n) y0 = y[~x] # y-values where x is False y1 = y[x] # y-values where x is True y0m = y0.mean() y1m = y1.mean() rpb = (y1m - y0m)*np.sqrt(phat * (1-phat)) / y.std() df = n-2 t = rpb*ma.sqrt(df/(1.0-rpb**2)) prob = _betai(0.5*df, 0.5, df/(df+t*t)) return PointbiserialrResult(rpb, prob)
Example #16
Source File: mstats_extras.py From GraphicDesignPatternByPython with MIT License | 4 votes |
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int, optional Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. See Also -------- hdquantiles """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan vv = np.arange(n) / float(n-1) betacdf = beta.cdf for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)), list(range(k+1,n))].astype(int_)]) for k in range(n)], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) return ma.fix_invalid(result, copy=False).ravel()
Example #17
Source File: mstats_basic.py From lambda-packs with MIT License | 4 votes |
def pointbiserialr(x, y): """Calculates a point biserial correlation coefficient and its p-value. Parameters ---------- x : array_like of bools Input array. y : array_like Input array. Returns ------- correlation : float R value pvalue : float 2-tailed p-value Notes ----- Missing values are considered pair-wise: if a value is missing in x, the corresponding value in y is masked. For more details on `pointbiserialr`, see `stats.pointbiserialr`. """ x = ma.fix_invalid(x, copy=True).astype(bool) y = ma.fix_invalid(y, copy=True).astype(float) # Get rid of the missing data m = ma.mask_or(ma.getmask(x), ma.getmask(y)) if m is not nomask: unmask = np.logical_not(m) x = x[unmask] y = y[unmask] n = len(x) # phat is the fraction of x values that are True phat = x.sum() / float(n) y0 = y[~x] # y-values where x is False y1 = y[x] # y-values where x is True y0m = y0.mean() y1m = y1.mean() rpb = (y1m - y0m)*np.sqrt(phat * (1-phat)) / y.std() df = n-2 t = rpb*ma.sqrt(df/(1.0-rpb**2)) prob = _betai(0.5*df, 0.5, df/(df+t*t)) return PointbiserialrResult(rpb, prob)
Example #18
Source File: mstats_extras.py From Computable with MIT License | 4 votes |
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence Sequence of quantiles to compute. axis : int Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. """ def _hdsd_1D(data,prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) #......... hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan #......... vv = np.arange(n) / float(n-1) betacdf = beta.cdf # for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)), list(range(k+1,n))].astype(int_)]) for k in range(n)], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks --------- data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) # return ma.fix_invalid(result, copy=False).ravel() #####-------------------------------------------------------------------------- #---- --- Confidence intervals --- #####--------------------------------------------------------------------------
Example #19
Source File: mstats_extras.py From lambda-packs with MIT License | 4 votes |
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int, optional Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. See Also -------- hdquantiles """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan vv = np.arange(n) / float(n-1) betacdf = beta.cdf for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)), list(range(k+1,n))].astype(int_)]) for k in range(n)], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) return ma.fix_invalid(result, copy=False).ravel()