Python scipy.stats.percentileofscore() Examples
The following are 14
code examples of scipy.stats.percentileofscore().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: test_aggregator.py From pywr with GNU General Public License v3.0 | 6 votes |
def agg_func(request): agg_func_name = request.param if agg_func_name == "custom": # When using custom you assign the function rather than a string. agg_func_name = npy_func = custom_test_func elif agg_func_name == "percentile": agg_func_name = { "func": "percentile", "args": [95], "kwargs": {} } npy_func = partial(np.percentile, q=95) elif agg_func_name == "percentileofscore": agg_func_name = { "func": "percentileofscore", "kwargs": { "score": 0.5, "kind": "rank" } } npy_func = partial(percentileofscore_with_axis, score=0.5, kind="rank") else: npy_func = npy_funcs[agg_func_name] return agg_func_name, npy_func
Example #2
Source File: predict.py From PIDGINv3 with GNU General Public License v3.0 | 6 votes |
def doPercentileCalculation(model_name): global rdkit_mols #expensive to unzip training file - so only done if smiles requested if options.ad_smiles: smiles = get_training_smiles(model_name) ad_data = getAdData(model_name) def calcPercentile(rdkit_mol): sims = DataStructs.BulkTanimotoSimilarity(rdkit_mol,ad_data[:,0]) bias = ad_data[:,2].astype(float) std_dev = ad_data[:,3].astype(float) scores = ad_data[:,5].astype(float) weights = sims / (bias * std_dev) critical_weight = weights.max() percentile = percentileofscore(scores,critical_weight) if options.ad_smiles: critical_smiles = smiles[np.argmax(weights)] result = percentile, critical_smiles else: result = percentile, None return result ret = [calcPercentile(x) for x in rdkit_mols] return model_name, ret #prediction runner for percentile calculation
Example #3
Source File: ABuPtPosition.py From abu with GNU General Public License v3.0 | 6 votes |
def fit_position(self, factor_object): """ 针对均值回复类型策略的仓位管理: 根据当前买入价格在过去一段金融序列中的价格rank位置来决定仓位 fit_position计算的结果是买入多少个单位(股,手,顿,合约) :param factor_object: ABuFactorBuyBases子类实例对象 :return:买入多少个单位(股,手,顿,合约) """ # self.kl_pd_buy为买入当天的数据,获取之前的past_day_cnt天数据 last_kl = factor_object.past_today_kl(self.kl_pd_buy, self.past_day_cnt) if last_kl is None or last_kl.empty: precent_pos = self.pos_base else: # 使用percentileofscore计算买入价格在过去的past_day_cnt天的价格位置 precent_pos = stats.percentileofscore(last_kl.close, self.bp) precent_pos = (1 + (self.mid_precent - precent_pos) / 100) * self.pos_base # 最大仓位限制,依然受上层最大仓位控制限制,eg:如果算出全仓,依然会减少到75%,如修改需要修改最大仓位值 precent_pos = self.pos_max if precent_pos > self.pos_max else precent_pos # 结果是买入多少个单位(股,手,顿,合约) return self.read_cash * precent_pos / self.bp * self.deposit_rate
Example #4
Source File: drought.py From RHEAS with MIT License | 6 votes |
def calcSeverity(model, varname="soil_moist"): """Calculate drought severity from *climatology* table stored in database.""" db = dbio.connect(model.dbname) cur = db.cursor() if varname == "soil_moist": sql = "select fdate,(ST_DumpValues(st_union(rast,'sum'))).valarray from {0}.soil_moist group by fdate order by fdate".format(model.name) else: sql = "select fdate,(ST_DumpValues(rast)).valarray from {0}.runoff order by fdate".format(model.name) cur.execute(sql) results = cur.fetchall() data = np.array([np.array(r[1]).ravel() for r in results]) i = np.where(np.not_equal(data[0, :], None))[0] p = pandas.DataFrame(data[:, i], index=np.array([r[0] for r in results], dtype='datetime64'), columns=range(len(i))) p = p.rolling('10D').mean() # calculate percentiles with dekad rolling mean st = "{0}-{1}-{2}".format(model.startyear, model.startmonth, model.startday) et = "{0}-{1}-{2}".format(model.endyear, model.endmonth, model.endday) s = np.array([[stats.percentileofscore(p[pi].values, v) for v in p[pi][st:et]] for pi in p.columns]).T s = 100.0 - s cur.close() db.close() return s
Example #5
Source File: test_aggregator.py From pywr with GNU General Public License v3.0 | 5 votes |
def percentileofscore_with_axis(values, *args, axis=0, **kwargs): if values.ndim == 1: # For 1D data we just calculate the percentile out = percentileofscore(values, *args, **kwargs) elif axis == 0: # 2D data by axis 0 out = [percentileofscore(values[:, i], *args, **kwargs) for i in range(values.shape[1])] elif axis == 1: # 2D data by axis 1 out = [percentileofscore(values[i, :], *args, **kwargs) for i in range(values.shape[0])] else: raise ValueError('Axis "{}" not supported'.format(axis)) return out
Example #6
Source File: eia860_test.py From pudl with MIT License | 5 votes |
def test_own_eia860(pudl_out_eia, live_pudl_db): """Sanity checks for EIA 860 generator ownership data.""" if not live_pudl_db: raise AssertionError("Data validation only works with a live PUDL DB.") logger.info('Reading EIA 860 generator ownership data...') own_out = pudl_out_eia.own_eia860() if (own_out.fraction_owned > 1.0).any(): raise AssertionError( "Generators with ownership fractions > 1.0 found. Bad data?" ) if (own_out.fraction_owned < 0.0).any(): raise AssertionError( "Generators with ownership fractions < 0.0 found. Bad data?" ) # Verify that the reported ownership fractions add up to something very # close to 1.0 (i.e. that the full ownership of each generator is # specified by the EIA860 data) own_gb = own_out.groupby(['report_date', 'plant_id_eia', 'generator_id']) own_sum = own_gb['fraction_owned'].agg(helpers.sum_na).reset_index() logger.info( f"{len(own_sum[own_sum.fraction_owned.isnull()])} generator-years have no ownership data.") own_sum = own_sum.dropna() pct_missing = stats.percentileofscore(own_sum.fraction_owned, 0.98) logger.info( f"{len(own_sum[own_sum.fraction_owned < 0.98])} ({pct_missing}%) " f"generator-years have incomplete ownership data.") if not max(own_sum['fraction_owned'] < 1.02): raise ValueError("Plants with more than 100% ownership found...") # There might be a few generators with incomplete ownership but virtually # everything should be pretty fully described. If not, let us know. The # 0.5 threshold means 0.5% -- i.e. less than 1 in 200 is partial. if pct_missing >= 0.5: raise ValueError( f"{pct_missing}% of generators lack complete ownership data." )
Example #7
Source File: anomaly_detectors.py From datastream.io with Apache License 2.0 | 5 votes |
def score_anomaly(self, x): x = pd.Series(x) scores = pd.Series([0.01*percentileofscore(self.sample_, z) for z in x]) return scores
Example #8
Source File: detector.py From datastream.io with Apache License 2.0 | 5 votes |
def score(self, x): from scipy.stats import percentileofscore return [0.01*percentileofscore(x, z) for z in x]
Example #9
Source File: perf.py From tia with BSD 3-Clause "New" or "Revised" License | 5 votes |
def rolling_percentileofscore(series, window, min_periods=None): """Computue the score percentile for the specified window.""" import scipy.stats as stats def _percentile(arr): score = arr[-1] vals = arr[:-1] return stats.percentileofscore(vals, score) notnull = series.dropna() min_periods = min_periods or window if notnull.empty: return pd.Series(np.nan, index=series.index) else: return pd.rolling_apply(notnull, window, _percentile, min_periods=min_periods).reindex(series.index)
Example #10
Source File: perf.py From tia with BSD 3-Clause "New" or "Revised" License | 5 votes |
def expanding_percentileofscore(series, min_periods=None): import scipy.stats as stats def _percentile(arr): score = arr[-1] vals = arr[:-1] return stats.percentileofscore(vals, score) notnull = series.dropna() if notnull.empty: return pd.Series(np.nan, index=series.index) else: return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex(series.index)
Example #11
Source File: stats.py From NiMARE with MIT License | 5 votes |
def null_to_p(test_value, null_array, tail='two'): """Return p-value for test value against null array. Parameters ---------- test_value : :obj:`float` Value for which to determine p-value. null_array : 1D :class:`numpy.ndarray` Null distribution against which test_value is compared. tail : {'two', 'upper', 'lower'}, optional Whether to compare value against null distribution in a two-sided ('two') or one-sided ('upper' or 'lower') manner. If 'upper', then higher values for the test_value are more significant. If 'lower', then lower values for the test_value are more significant. Default is 'two'. Returns ------- p_value : :obj:`float` P-value associated with the test value when compared against the null distribution. """ if tail == 'two': p_value = (50 - np.abs(stats.percentileofscore( null_array, test_value) - 50.)) * 2. / 100. elif tail == 'upper': p_value = 1 - (stats.percentileofscore(null_array, test_value) / 100.) elif tail == 'lower': p_value = stats.percentileofscore(null_array, test_value) / 100. else: raise ValueError('Argument "tail" must be one of ["two", "upper", ' '"lower"]') return p_value
Example #12
Source File: replay.py From nelpy with MIT License | 4 votes |
def score_hmm_events_no_xval(bst, training=None, validation=None, num_states=30, n_shuffles=5000, shuffle='row-wise', verbose=False): """same as score_hmm_events, but train on training set, and only score validation set...""" if shuffle == 'row-wise': rowwise = True elif shuffle == 'col-wise': rowwise = False else: shuffle = 'timeswap' scores_hmm = np.zeros(len(validation)) scores_hmm_shuffled = np.zeros((len(validation), n_shuffles)) PBEs_train = bst[training] PBEs_test = bst[validation] # train HMM on all training PBEs hmm = hmmutils.PoissonHMM(n_components=num_states, random_state=0, verbose=False) hmm.fit(PBEs_train) # reorder states according to transmat ordering transmat_order = hmm.get_state_order('transmat') hmm.reorder_states(transmat_order) # compute scores_hmm (log likelihoods) of validation set: scores_hmm[:] = hmm.score(PBEs_test) if shuffle == 'timeswap': _, scores_tswap_hmm = score_hmm_timeswap_shuffle(bst=PBEs_test, hmm=hmm, n_shuffles=n_shuffles) scores_hmm_shuffled[:,:] = scores_tswap_hmm.T else: hmm_shuffled = copy.deepcopy(hmm) for nn in range(n_shuffles): # shuffle transition matrix: if rowwise: hmm_shuffled.transmat_ = shuffle_transmat(hmm_shuffled.transmat) else: hmm_shuffled.transmat_ = shuffle_transmat_Kourosh_breaks_stochasticity(hmm_shuffled.transmat) hmm_shuffled.transmat_ = hmm_shuffled.transmat / np.tile(hmm_shuffled.transmat.sum(axis=1), (hmm_shuffled.n_components, 1)).T # score validation set with shuffled HMM scores_hmm_shuffled[:, nn] = hmm_shuffled.score(PBEs_test) n_scores = len(scores_hmm) scores_hmm_percentile = np.array([stats.percentileofscore(scores_hmm_shuffled[idx], scores_hmm[idx], kind='mean') for idx in range(n_scores)]) return scores_hmm, scores_hmm_shuffled, scores_hmm_percentile
Example #13
Source File: statistics.py From gs-quant with Apache License 2.0 | 4 votes |
def percentiles(x: pd.Series, y: pd.Series = None, w: Union[Window, int] = Window(None, 0)) -> pd.Series: """ Rolling percentiles over given window :param x: value series :param y: distribution series :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size and 10 the ramp up value. Window size defaults to length of series. :return: timeseries of percentiles **Usage** Calculate `percentile rank <https://en.wikipedia.org/wiki/Percentile_rank>`_ of :math:`y` in the sample distribution of :math:`x` over a rolling window of length :math:`w`: :math:`R_t = \\frac{\sum_{i=t-N+1}^{t}{[X_i<{Y_t}]}+0.5\sum_{i=t-N+1}^{t}{[X_i={Y_t}]}}{N}\\times100\%` Where :math:`N` is the number of observations in a rolling window. If :math:`y` is not provided, calculates percentiles of :math:`x` over its historical values. If window length :math:`w` is not provided, uses an ever-growing history of values. If :math:`w` is greater than the available data size, returns empty. **Examples** Compute percentile ranks of a series in the sample distribution of a second series over :math:`22` observations >>> a = generate_series(100) >>> b = generate_series(100) >>> percentiles(a, b, 22) **See also** :func:`zscores` """ w = normalize_window(x, w) if x.empty: return x if y is None: y = x.copy() if isinstance(w.r, int) and w.r > len(y): raise ValueError('Ramp value must be less than the length of the series y.') if isinstance(w.w, int) and w.w > len(x): return pd.Series() res = pd.Series(dtype=np.dtype(float)) for idx, val in y.iteritems(): sample = x.loc[(x.index > idx - w.w) & (x.index <= idx)] if isinstance(w.w, pd.DateOffset) else x[:idx][-w.w:] res.loc[idx] = percentileofscore(sample, val, kind='mean') if isinstance(w.r, pd.DateOffset): return res.loc[res.index[0] + w.r:] else: return res[w.r:]
Example #14
Source File: dataset.py From tropycal with MIT License | 4 votes |
def season_composite(self,seasons,climo_bounds=None): r""" Create composite statistics for a list of seasons. Parameters ---------- seasons : list List of seasons to create a composite of. For Southern Hemisphere, season is the start of the two-year period. climo_bounds : list or tuple List or tuple of start and end years of climatology bounds. If none, defaults to (1981,2010). Returns ------- dict Dictionary containing the composite of the requested seasons. """ #Error check if isinstance(seasons,list) == False: raise TypeError("'seasons' must be of type list.") #Create climo bounds if climo_bounds is None: climo_bounds = (1981,2010) #Get Season object for the composite summary = self.get_season(seasons).summary() #Get basin climatology climatology = self.climatology(climo_bounds[0],climo_bounds[1]) full_climo = self.to_dataframe() subset_climo = full_climo.loc[climo_bounds[0]:climo_bounds[1]+1] #Create composite dictionary map_keys = {'all_storms':'season_storms', 'named_storms':'season_named', 'hurricanes':'season_hurricane', 'major_hurricanes':'season_major', 'ace':'season_ace', } composite = {} for key in map_keys.keys(): #Get list from seasons season_list = summary[map_keys.get(key)] #Get climatology season_climo = climatology[key] #Get individual years in climatology season_fullclimo = subset_climo[key].values #Create dictionary of relevant calculations for this entry composite[key] = {'list':season_list, 'average':np.round(np.average(season_list),1), 'composite_anomaly':np.round(np.average(season_list)-season_climo,1), 'percentile_ranks':[np.round(stats.percentileofscore(season_fullclimo,i),1) for i in season_list], } return composite