Python scipy.stats.percentileofscore() Examples

The following are 14 code examples of scipy.stats.percentileofscore(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_aggregator.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def agg_func(request):
    agg_func_name = request.param

    if agg_func_name == "custom":
        # When using custom you assign the function rather than a string.
        agg_func_name = npy_func = custom_test_func
    elif agg_func_name == "percentile":
        agg_func_name = {
            "func": "percentile",
            "args": [95],
            "kwargs": {}
        }
        npy_func = partial(np.percentile, q=95)
    elif agg_func_name == "percentileofscore":
        agg_func_name = {
            "func": "percentileofscore",
            "kwargs": {
                "score": 0.5,
                "kind": "rank"
            }
        }
        npy_func = partial(percentileofscore_with_axis, score=0.5, kind="rank")
    else:
        npy_func = npy_funcs[agg_func_name]
    return agg_func_name, npy_func 
Example #2
Source File: predict.py    From PIDGINv3 with GNU General Public License v3.0 6 votes vote down vote up
def doPercentileCalculation(model_name):
	global rdkit_mols
	#expensive to unzip training file - so only done if smiles requested
	if options.ad_smiles:
		smiles = get_training_smiles(model_name)
	ad_data = getAdData(model_name)
	def calcPercentile(rdkit_mol):
		sims = DataStructs.BulkTanimotoSimilarity(rdkit_mol,ad_data[:,0])
		bias = ad_data[:,2].astype(float)
		std_dev = ad_data[:,3].astype(float)
		scores = ad_data[:,5].astype(float)
		weights = sims / (bias * std_dev)
		critical_weight = weights.max()
		percentile = percentileofscore(scores,critical_weight)
		if options.ad_smiles:
			critical_smiles = smiles[np.argmax(weights)]
			result = percentile, critical_smiles
		else:
			result = percentile, None
		return result
	ret = [calcPercentile(x) for x in rdkit_mols]
	return model_name, ret

#prediction runner for percentile calculation 
Example #3
Source File: ABuPtPosition.py    From abu with GNU General Public License v3.0 6 votes vote down vote up
def fit_position(self, factor_object):
        """
        针对均值回复类型策略的仓位管理:
        根据当前买入价格在过去一段金融序列中的价格rank位置来决定仓位
        fit_position计算的结果是买入多少个单位(股,手,顿,合约)
        :param factor_object: ABuFactorBuyBases子类实例对象
        :return:买入多少个单位(股,手,顿,合约)
        """

        # self.kl_pd_buy为买入当天的数据,获取之前的past_day_cnt天数据
        last_kl = factor_object.past_today_kl(self.kl_pd_buy, self.past_day_cnt)
        if last_kl is None or last_kl.empty:
            precent_pos = self.pos_base
        else:
            # 使用percentileofscore计算买入价格在过去的past_day_cnt天的价格位置
            precent_pos = stats.percentileofscore(last_kl.close, self.bp)
            precent_pos = (1 + (self.mid_precent - precent_pos) / 100) * self.pos_base
        # 最大仓位限制,依然受上层最大仓位控制限制,eg:如果算出全仓,依然会减少到75%,如修改需要修改最大仓位值
        precent_pos = self.pos_max if precent_pos > self.pos_max else precent_pos
        # 结果是买入多少个单位(股,手,顿,合约)
        return self.read_cash * precent_pos / self.bp * self.deposit_rate 
Example #4
Source File: drought.py    From RHEAS with MIT License 6 votes vote down vote up
def calcSeverity(model, varname="soil_moist"):
    """Calculate drought severity from *climatology* table stored in database."""
    db = dbio.connect(model.dbname)
    cur = db.cursor()
    if varname == "soil_moist":
        sql = "select fdate,(ST_DumpValues(st_union(rast,'sum'))).valarray from {0}.soil_moist group by fdate order by fdate".format(model.name)
    else:
        sql = "select fdate,(ST_DumpValues(rast)).valarray from {0}.runoff order by fdate".format(model.name)
    cur.execute(sql)
    results = cur.fetchall()
    data = np.array([np.array(r[1]).ravel() for r in results])
    i = np.where(np.not_equal(data[0, :], None))[0]
    p = pandas.DataFrame(data[:, i], index=np.array([r[0] for r in results], dtype='datetime64'), columns=range(len(i)))
    p = p.rolling('10D').mean()  # calculate percentiles with dekad rolling mean
    st = "{0}-{1}-{2}".format(model.startyear, model.startmonth, model.startday)
    et = "{0}-{1}-{2}".format(model.endyear, model.endmonth, model.endday)
    s = np.array([[stats.percentileofscore(p[pi].values, v) for v in p[pi][st:et]] for pi in p.columns]).T
    s = 100.0 - s
    cur.close()
    db.close()
    return s 
Example #5
Source File: test_aggregator.py    From pywr with GNU General Public License v3.0 5 votes vote down vote up
def percentileofscore_with_axis(values, *args, axis=0, **kwargs):
    if values.ndim == 1:
        # For 1D data we just calculate the percentile
        out = percentileofscore(values, *args, **kwargs)
    elif axis == 0:
        # 2D data by axis 0
        out = [percentileofscore(values[:, i], *args, **kwargs) for i in range(values.shape[1])]
    elif axis == 1:
        # 2D data by axis 1
        out = [percentileofscore(values[i, :], *args, **kwargs) for i in range(values.shape[0])]
    else:
        raise ValueError('Axis "{}" not supported'.format(axis))
    return out 
Example #6
Source File: eia860_test.py    From pudl with MIT License 5 votes vote down vote up
def test_own_eia860(pudl_out_eia, live_pudl_db):
    """Sanity checks for EIA 860 generator ownership data."""
    if not live_pudl_db:
        raise AssertionError("Data validation only works with a live PUDL DB.")
    logger.info('Reading EIA 860 generator ownership data...')
    own_out = pudl_out_eia.own_eia860()

    if (own_out.fraction_owned > 1.0).any():
        raise AssertionError(
            "Generators with ownership fractions > 1.0 found. Bad data?"
        )

    if (own_out.fraction_owned < 0.0).any():
        raise AssertionError(
            "Generators with ownership fractions < 0.0 found. Bad data?"
        )

    # Verify that the reported ownership fractions add up to something very
    # close to 1.0 (i.e. that the full ownership of each generator is
    # specified by the EIA860 data)
    own_gb = own_out.groupby(['report_date', 'plant_id_eia', 'generator_id'])
    own_sum = own_gb['fraction_owned'].agg(helpers.sum_na).reset_index()
    logger.info(
        f"{len(own_sum[own_sum.fraction_owned.isnull()])} generator-years have no ownership data.")

    own_sum = own_sum.dropna()
    pct_missing = stats.percentileofscore(own_sum.fraction_owned, 0.98)
    logger.info(
        f"{len(own_sum[own_sum.fraction_owned < 0.98])} ({pct_missing}%) "
        f"generator-years have incomplete ownership data.")
    if not max(own_sum['fraction_owned'] < 1.02):
        raise ValueError("Plants with more than 100% ownership found...")
    # There might be a few generators with incomplete ownership but virtually
    # everything should be pretty fully described. If not, let us know. The
    # 0.5 threshold means 0.5% -- i.e. less than 1 in 200 is partial.
    if pct_missing >= 0.5:
        raise ValueError(
            f"{pct_missing}% of generators lack complete ownership data."
        ) 
Example #7
Source File: anomaly_detectors.py    From datastream.io with Apache License 2.0 5 votes vote down vote up
def score_anomaly(self, x):
        x = pd.Series(x)
        scores = pd.Series([0.01*percentileofscore(self.sample_, z) for z in x])
        return scores 
Example #8
Source File: detector.py    From datastream.io with Apache License 2.0 5 votes vote down vote up
def score(self, x):
        from scipy.stats import percentileofscore
        return [0.01*percentileofscore(x, z) for z in x] 
Example #9
Source File: perf.py    From tia with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rolling_percentileofscore(series, window, min_periods=None):
    """Computue the score percentile for the specified window."""
    import scipy.stats as stats

    def _percentile(arr):
        score = arr[-1]
        vals = arr[:-1]
        return stats.percentileofscore(vals, score)

    notnull = series.dropna()
    min_periods = min_periods or window
    if notnull.empty:
        return pd.Series(np.nan, index=series.index)
    else:
        return pd.rolling_apply(notnull, window, _percentile, min_periods=min_periods).reindex(series.index) 
Example #10
Source File: perf.py    From tia with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def expanding_percentileofscore(series, min_periods=None):
    import scipy.stats as stats

    def _percentile(arr):
        score = arr[-1]
        vals = arr[:-1]
        return stats.percentileofscore(vals, score)

    notnull = series.dropna()
    if notnull.empty:
        return pd.Series(np.nan, index=series.index)
    else:
        return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex(series.index) 
Example #11
Source File: stats.py    From NiMARE with MIT License 5 votes vote down vote up
def null_to_p(test_value, null_array, tail='two'):
    """Return p-value for test value against null array.

    Parameters
    ----------
    test_value : :obj:`float`
        Value for which to determine p-value.
    null_array : 1D :class:`numpy.ndarray`
        Null distribution against which test_value is compared.
    tail : {'two', 'upper', 'lower'}, optional
        Whether to compare value against null distribution in a two-sided
        ('two') or one-sided ('upper' or 'lower') manner.
        If 'upper', then higher values for the test_value are more significant.
        If 'lower', then lower values for the test_value are more significant.
        Default is 'two'.

    Returns
    -------
    p_value : :obj:`float`
        P-value associated with the test value when compared against the null
        distribution.
    """
    if tail == 'two':
        p_value = (50 - np.abs(stats.percentileofscore(
            null_array, test_value) - 50.)) * 2. / 100.
    elif tail == 'upper':
        p_value = 1 - (stats.percentileofscore(null_array, test_value) / 100.)
    elif tail == 'lower':
        p_value = stats.percentileofscore(null_array, test_value) / 100.
    else:
        raise ValueError('Argument "tail" must be one of ["two", "upper", '
                         '"lower"]')
    return p_value 
Example #12
Source File: replay.py    From nelpy with MIT License 4 votes vote down vote up
def score_hmm_events_no_xval(bst, training=None, validation=None, num_states=30, n_shuffles=5000, shuffle='row-wise', verbose=False):
    """same as score_hmm_events, but train on training set, and only score validation set..."""
    if shuffle == 'row-wise':
        rowwise = True
    elif shuffle == 'col-wise':
        rowwise = False
    else:
        shuffle = 'timeswap'

    scores_hmm = np.zeros(len(validation))
    scores_hmm_shuffled = np.zeros((len(validation), n_shuffles))

    PBEs_train = bst[training]
    PBEs_test = bst[validation]

    # train HMM on all training PBEs
    hmm = hmmutils.PoissonHMM(n_components=num_states, random_state=0, verbose=False)
    hmm.fit(PBEs_train)

    # reorder states according to transmat ordering
    transmat_order = hmm.get_state_order('transmat')
    hmm.reorder_states(transmat_order)

    # compute scores_hmm (log likelihoods) of validation set:
    scores_hmm[:] = hmm.score(PBEs_test)

    if shuffle == 'timeswap':
        _, scores_tswap_hmm = score_hmm_timeswap_shuffle(bst=PBEs_test,
                                                        hmm=hmm,
                                                        n_shuffles=n_shuffles)

        scores_hmm_shuffled[:,:] = scores_tswap_hmm.T
    else:
        hmm_shuffled = copy.deepcopy(hmm)
        for nn in range(n_shuffles):
            # shuffle transition matrix:
            if rowwise:
                hmm_shuffled.transmat_ = shuffle_transmat(hmm_shuffled.transmat)
            else:
                hmm_shuffled.transmat_ = shuffle_transmat_Kourosh_breaks_stochasticity(hmm_shuffled.transmat)
                hmm_shuffled.transmat_ = hmm_shuffled.transmat / np.tile(hmm_shuffled.transmat.sum(axis=1), (hmm_shuffled.n_components, 1)).T

            # score validation set with shuffled HMM
            scores_hmm_shuffled[:, nn] = hmm_shuffled.score(PBEs_test)

    n_scores = len(scores_hmm)
    scores_hmm_percentile = np.array([stats.percentileofscore(scores_hmm_shuffled[idx], scores_hmm[idx], kind='mean') for idx in range(n_scores)])

    return scores_hmm, scores_hmm_shuffled, scores_hmm_percentile 
Example #13
Source File: statistics.py    From gs-quant with Apache License 2.0 4 votes vote down vote up
def percentiles(x: pd.Series, y: pd.Series = None, w: Union[Window, int] = Window(None, 0)) -> pd.Series:
    """
    Rolling percentiles over given window

    :param x: value series
    :param y: distribution series
    :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
              and 10 the ramp up value. Window size defaults to length of series.
    :return: timeseries of percentiles

    **Usage**

    Calculate `percentile rank <https://en.wikipedia.org/wiki/Percentile_rank>`_ of :math:`y` in the sample distribution
    of :math:`x` over a rolling window of length :math:`w`:

    :math:`R_t = \\frac{\sum_{i=t-N+1}^{t}{[X_i<{Y_t}]}+0.5\sum_{i=t-N+1}^{t}{[X_i={Y_t}]}}{N}\\times100\%`

    Where :math:`N` is the number of observations in a rolling window. If :math:`y` is not provided, calculates
    percentiles of :math:`x` over its historical values. If window length :math:`w` is not provided, uses an
    ever-growing history of values. If :math:`w` is greater than the available data size, returns empty.

    **Examples**

    Compute percentile ranks of a series in the sample distribution of a second series over :math:`22` observations

    >>> a = generate_series(100)
    >>> b = generate_series(100)
    >>> percentiles(a, b, 22)

    **See also**

    :func:`zscores`

    """
    w = normalize_window(x, w)
    if x.empty:
        return x

    if y is None:
        y = x.copy()

    if isinstance(w.r, int) and w.r > len(y):
        raise ValueError('Ramp value must be less than the length of the series y.')

    if isinstance(w.w, int) and w.w > len(x):
        return pd.Series()

    res = pd.Series(dtype=np.dtype(float))
    for idx, val in y.iteritems():
        sample = x.loc[(x.index > idx - w.w) & (x.index <= idx)] if isinstance(w.w, pd.DateOffset) else x[:idx][-w.w:]
        res.loc[idx] = percentileofscore(sample, val, kind='mean')

    if isinstance(w.r, pd.DateOffset):
        return res.loc[res.index[0] + w.r:]
    else:
        return res[w.r:] 
Example #14
Source File: dataset.py    From tropycal with MIT License 4 votes vote down vote up
def season_composite(self,seasons,climo_bounds=None):
        
        r"""
        Create composite statistics for a list of seasons.
        
        Parameters
        ----------
        seasons : list
            List of seasons to create a composite of. For Southern Hemisphere, season is the start of the two-year period.
        climo_bounds : list or tuple
            List or tuple of start and end years of climatology bounds. If none, defaults to (1981,2010).
        
        Returns
        -------
        dict
            Dictionary containing the composite of the requested seasons.
        """
        
        #Error check
        if isinstance(seasons,list) == False:
            raise TypeError("'seasons' must be of type list.")
        
        #Create climo bounds
        if climo_bounds is None:
            climo_bounds = (1981,2010)
        
        #Get Season object for the composite
        summary = self.get_season(seasons).summary()
        
        #Get basin climatology
        climatology = self.climatology(climo_bounds[0],climo_bounds[1])
        full_climo = self.to_dataframe()
        subset_climo = full_climo.loc[climo_bounds[0]:climo_bounds[1]+1]
        
        #Create composite dictionary
        map_keys = {'all_storms':'season_storms',
                    'named_storms':'season_named',
                    'hurricanes':'season_hurricane',
                    'major_hurricanes':'season_major',
                    'ace':'season_ace',
                   }
        composite = {}
        for key in map_keys.keys():
            
            #Get list from seasons
            season_list = summary[map_keys.get(key)]
            
            #Get climatology
            season_climo = climatology[key]
            
            #Get individual years in climatology
            season_fullclimo = subset_climo[key].values
            
            #Create dictionary of relevant calculations for this entry
            composite[key] = {'list':season_list,
                              'average':np.round(np.average(season_list),1),
                              'composite_anomaly':np.round(np.average(season_list)-season_climo,1),
                              'percentile_ranks':[np.round(stats.percentileofscore(season_fullclimo,i),1) for i in season_list],
                             }
        
        return composite