Python Examples of scipy.stats.mstats.mquantiles

Source File: test_mstats_basic.py From Computable with MIT License

6 votes

def test_mquantiles_limit_keyword(self):
        """Ticket #867"""
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired)

Source File: kernel_extras.py From vnpy_crypto with MIT License

5 votes

def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig

Source File: hmm.py From sima with GNU General Public License v2.0

5 votes

def _threshold_gradient(im):
    """Indicate pixel locations with gradient below the bottom 10th percentile

    Parameters
    ----------
    im : array
        The mean intensity images for each channel.
        Size: (num_channels, num_rows, num_columns).

    Returns
    -------
    array
        Binary values indicating whether the magnitude of the gradient is below
        the 10th percentile.  Same size as im.

    """

    if im.shape[0] > 1:
        # Calculate directional relative derivatives
        _, g_x, g_y = np.gradient(np.log(im))
    else:
        # Calculate directional relative derivatives
        g_x, g_y = np.gradient(np.log(im[0]))
        g_x = g_x.reshape([1, g_x.shape[0], g_x.shape[1]])
        g_y = g_y.reshape([1, g_y.shape[0], g_y.shape[1]])
    gradient_magnitudes = np.sqrt((g_x ** 2) + (g_y ** 2))
    below_threshold = []
    for chan in gradient_magnitudes:
        threshold = mquantiles(chan[np.isfinite(chan)].flatten(), [0.1])[0]
        below_threshold.append(chan < threshold)
    return np.array(below_threshold)

Source File: _kernel_base.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion

Source File: kernel_regression.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig

Source File: kernel_regression.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig

Source File: kernel_extras.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig

Source File: stat_qq_line.py From plotnine with GNU General Public License v2.0

5 votes

def compute_group(cls, data, scales, **params):
        line_p = params['line_p']
        dparams = params['dparams']

        # Compute theoretical values
        df = stat_qq.compute_group(data, scales, **params)
        sample = df['sample'].values
        theoretical = df['theoretical'].values

        # Compute slope & intercept of the line through the quantiles
        cdist = get_continuous_distribution(params['distribution'])
        x_coords = cdist.ppf(line_p, *dparams)
        y_coords = mquantiles(sample, line_p)
        slope = (np.diff(y_coords)/np.diff(x_coords))[0]
        intercept = y_coords[0] - slope*x_coords[0]

        # Get x,y points that describe the line
        if params['fullrange'] and scales.x:
            x = scales.x.dimension()
        else:
            x = theoretical.min(), theoretical.max()

        x = np.asarray(x)
        y = slope * x + intercept
        data = pd.DataFrame({'x': x, 'y': y})
        return data

Source File: stats_utils.py From arviz with Apache License 2.0

5 votes

def quantile(ary, q, axis=None, limit=None):
    """Use same quantile function as R (Type 7)."""
    if limit is None:
        limit = tuple()
    return mquantiles(ary, q, alphap=1, betap=1, axis=axis, limit=limit)

Source File: _kernel_base.py From vnpy_crypto with MIT License

5 votes

def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion

Source File: kernel_regression.py From vnpy_crypto with MIT License

5 votes

def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig

Source File: kernel_regression.py From vnpy_crypto with MIT License

5 votes

def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig

Source File: partial_dependence.py From Splunking-Crime with GNU Affero General Public License v3.0

4 votes

def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes

Source File: partial_dependence.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes

Source File: fixed_run.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

4 votes

def get_estimates(gen, sigmas=None, n_reps=100, n_null_samps=1000,
                  cache_size=64, rep_states=False, name=None,
                  save_samps=False, thresh_levels=(.2, .1, .05, .01)):
    if sigmas is None:
        sigmas = np.logspace(-1.7, 1.7, num=30)
    sigmas = np.asarray(sigmas)

    mmd = sg.QuadraticTimeMMD()
    mmd.set_num_null_samples(n_null_samps)
    mmd_mk = mmd.multikernel()
    for s in sigmas:
        mmd_mk.add_kernel(sg.GaussianKernel(cache_size, 2 * s**2))

    info = OrderedDict()
    for k in 'sigma rep mmd_est var_est p'.split():
        info[k] = []
    thresh_names = []
    for l in thresh_levels:
        s = 'thresh_{}'.format(l)
        thresh_names.append(s)
        info[s] = []
    if save_samps:
        info['samps'] = []

    thresh_prob = 1 - np.asarray(thresh_levels)

    bar = pb.ProgressBar()
    if name is not None:
        bar.start()
        bar.widgets.insert(0, '{} '.format(name))
    for rep in bar(xrange(n_reps)):
        if rep_states:
            rep = np.random.randint(0, 2**32)
            X, Y = gen(rs=rep)
        else:
            X, Y = gen()
        n = X.shape[0]
        assert Y.shape[0] == n
        mmd.set_p(sg.RealFeatures(X.T))
        mmd.set_q(sg.RealFeatures(Y.T))

        info['sigma'].extend(sigmas)
        info['rep'].extend([rep] * len(sigmas))

        stat = mmd_mk.compute_statistic()
        info['mmd_est'].extend(stat / (n / 2))

        samps = mmd_mk.sample_null()
        info['p'].extend(np.mean(samps >= stat, axis=0))
        if save_samps:
            info['samps'].extend(samps.T)

        info['var_est'].extend(mmd_mk.compute_variance_h1())

        threshes = np.asarray(mquantiles(samps, prob=thresh_prob, axis=0))
        for s, t in zip(thresh_names, threshes):
            info[s].extend(t)

    info = pd.DataFrame(info)
    info.set_index(['sigma', 'rep'], inplace=True)
    return info

Source File: partial_dependence.py From twitter-stock-recommendation with MIT License

4 votes

def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes

Source File: fithic.py From fithic with MIT License

4 votes

def read_biases(infilename):
    global biasLowerBound
    global biasUpperBound
    startt = time.time()
    biasDic={}

    rawBiases=[]
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2])
            if bias!=1.0:
               rawBiases.append(bias)
        botQ,med,topQ=mquantiles(rawBiases,prob=[0.05,0.5,0.95])
        with open(logfile, 'a') as log:
            log.write("5th quantile of biases: "+str(botQ)+"\n")
            log.write("50th quantile of biases: "+str(med)+"\n")
            log.write("95th quantile of biases: "+str(topQ)+"\n")
    totalC=0
    discardC=0
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2]);
            if bias<biasLowerBound or math.isnan(bias):
                bias=-1 #botQ
                discardC+=1
            elif bias>biasUpperBound:
                bias=-1 #topQ
                discardC+=1
            totalC+=1
            if chrom not in biasDic:
                biasDic[chrom]={}
            if midPoint not in biasDic[chrom]:
                biasDic[chrom][midPoint]=bias
    with open(logfile, 'a') as log:
        log.write("Out of " + str(totalC) + " loci " +str(discardC) +" were discarded with biases not in range [0.5 2]\n\n" )
    endt = time.time()
    print("Bias file read. Time took %s" % (endt-startt))
    return biasDic # from read_biases

#==================================
# function to compute the contact probabilities
# applied for intra-chromosomal interactions
#==================================

Python scipy.stats.mstats.mquantiles() Examples