Python scipy.stats.f_oneway() Examples
The following are 21
code examples of scipy.stats.f_oneway().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: TargetAnalysisCategorical.py From exploripy with MIT License | 8 votes |
def Anova(self): """ Calculate the F-Score (One Way Anova) for each of Categorical Variables with all the Continuous Variables. Output --> List of Continuous Variables, whose pValue is < 0.05 """ target = self.target AnovaList = [] for ContinuousVar in self.ContinuousFeatures: temp_df = self.df[[ContinuousVar, target]].dropna() try: f,p = stats.f_oneway(*[list(temp_df[temp_df[target]==name][ContinuousVar]) for name in set(temp_df[target])]) AnovaList.append(dict(Continuous = ContinuousVar, PValue = p)) except: # Do nothing. Skip. 1==1 Anova_df = pd.DataFrame(AnovaList) if Anova_df.shape[0]>0: Anova_df = Anova_df[Anova_df['PValue']<=0.05] Anova_df.sort_values(['PValue'],ascending = True, inplace=True) return Anova_df
Example #2
Source File: TargetAnalysisContinuous.py From exploripy with MIT License | 6 votes |
def Anova(self): """ Calculate the F-Score (One Way Anova) for each of Categorical Variables with all the Continuous Variables. Output --> List of Continuous Variables, whose pValue is < 0.05 """ target = self.target AnovaList = [] print('Performing ANOVA...') for CategoricalVar in tqdm(self.CategoricalFeatures): temp_df = self.df[[CategoricalVar, target]].dropna() try: f,p = stats.f_oneway(*[list(temp_df[temp_df[CategoricalVar]==name][target]) for name in set(temp_df[CategoricalVar])]) AnovaList.append(dict(Categorical = CategoricalVar, PValue = p)) except: # Do Nothing. Skip. 1==1 Anova_df = pd.DataFrame(AnovaList) if Anova_df.shape[0]>0: Anova_df = Anova_df[Anova_df['PValue']<=0.05] Anova_df.sort_values(['PValue'],ascending = True, inplace=True) return Anova_df
Example #3
Source File: test_correct.py From abagen with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test__batch(): rs = np.random.RandomState(1234) # p-values for ANOVA should all be ~0 (large group differences) before # batch correction y = [rs.normal(size=(100, 1000)) + f for f in [5, 0, 0]] assert np.allclose(sstats.f_oneway(*y)[1], 0) # F-values for ANOVA should all be ~0 (no group differences) after batch # correction; p-values returned here are sometimes NaN so not a good test out = correct._batch_correct(y) assert np.allclose(sstats.f_oneway(*out)[0], 0) # mean expressions after correction should be ~equal assert np.allclose([o.mean() for o in out], 1.24871965683026) with pytest.raises(ValueError): correct._batch_correct([y[0]])
Example #4
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_result_attributes(self): a = np.array([655, 788], dtype=np.uint16) b = np.array([789, 772], dtype=np.uint16) res = stats.f_oneway(a, b) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
Example #5
Source File: EDA.py From exploripy with MIT License | 5 votes |
def Anova(self): """ Calculate the F-Score (One Way Anova) for each of Categorical Variables with all the Continuous Variables """ # Drop records with Null values temp_df = self.df.dropna() start = time.time() AnovaList = [] SummaryAnovaList = [] Insight1 = "With Confidence interval of 0.05, the variable - \"{0}\" is influenced by the categorical variable - \"{1}\". " Insight2 = "As the Categorical variable - \"{0}\" is binary, Tukey's HSD test is not necessary. " Insight3 = "As the p-Value is higher than the Confidence Interval 0.05, the variable - \"{0}\" is not influenced by the categorical variable - \"{1}\". " for CategoricalVar in self.CategoricalFeatures: Binary = 'Yes' if CategoricalVar in self.BinaryCategoricalFeatures else 'No' for ContinuousVar in self.ContinuousFeatures: TukeyResult = None f,p = stats.f_oneway(*[list(temp_df[temp_df[CategoricalVar]==name][ContinuousVar]) for name in set(temp_df[CategoricalVar])]) if (p<0.05 and CategoricalVar in self.BinaryCategoricalFeatures): Insight = Insight1.format(ContinuousVar, CategoricalVar) + Insight2.format(CategoricalVar) elif p<0.05: TukeyResult = self.Tukey(CategoricalVar, ContinuousVar) Insight = Insight1.format(ContinuousVar, CategoricalVar) else: Insight = Insight3.format(ContinuousVar, CategoricalVar) AnovaList.append(dict(Categorical = CategoricalVar, Continuous = ContinuousVar, f = f, p = p, Binary = Binary, Insight = Insight, TukeyResult = TukeyResult)) for entry in AnovaList: Categorical = entry['Categorical'] Continuous = entry['Continuous'] PValue = entry['p'] SummaryAnovaList.append(dict(Categorical=Categorical,Continuous=Continuous,PValue=PValue)) end = time.time() if self.debug == 'YES': print('Anova',end-start) return AnovaList,pd.DataFrame(SummaryAnovaList)
Example #6
Source File: test_feature_select.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_f_oneway_ints(): # Smoke test f_oneway on integers: that it does raise casting errors # with recent numpys rng = np.random.RandomState(0) X = rng.randint(10, size=(10, 10)) y = np.arange(10) fint, pint = f_oneway(X, y) # test that is gives the same result as with float f, p = f_oneway(X.astype(np.float), y) assert_array_almost_equal(f, fint, decimal=4) assert_array_almost_equal(p, pint, decimal=4)
Example #7
Source File: test_feature_select.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_f_oneway_vs_scipy_stats(): # Test that our f_oneway gives the same result as scipy.stats rng = np.random.RandomState(0) X1 = rng.randn(10, 3) X2 = 1 + rng.randn(10, 3) f, pv = stats.f_oneway(X1, X2) f2, pv2 = f_oneway(X1, X2) assert_true(np.allclose(f, f2)) assert_true(np.allclose(pv, pv2))
Example #8
Source File: misc.py From audit-ai with MIT License | 5 votes |
def anova(labels, results, subset_labels=None): """ Returns one-way ANOVA f-statistic and p-value from input vectors of categorical labels and numeric results Parameters ------------ labels : array_like containing categorical values like ['M', 'F'] results : array_like containing real numbers subset_labels : list of strings, optional if only specific labels should be included Returns ---------- F_onewayResult : scipy.stats object (essentially a 2-tuple) contains one-way f-statistic and p-value, indicating whether scores have same sample mean """ check_consistent_length(labels, results) df = pd.DataFrame(list(zip(labels, results)), columns=['label', 'result']) if subset_labels is not None: df = df.loc[df['label'].isin(subset_labels)] unique_labels = df['label'].dropna().unique() score_vectors = [df.loc[df['label'] == lab, 'result'] for lab in unique_labels] return f_oneway(*score_vectors)
Example #9
Source File: plot.py From SCALE with MIT License | 5 votes |
def feature_specifity(feature, ref, classes, figsize=(6,6), save=None): """ Calculate the feature specifity: Input: feature: latent feature ref: cluster assignments classes: cluster classes """ from scipy.stats import f_oneway # n_cluster = max(ref) + 1 n_cluster = len(classes) dim = feature.shape[1] # feature dimension pvalue_mat = np.zeros((dim, n_cluster)) for i,cluster in enumerate(classes): for feat in range(dim): a = feature.iloc[:, feat][ref == cluster] b = feature.iloc[:, feat][ref != cluster] pvalue = f_oneway(a,b)[1] pvalue_mat[feat, i] = pvalue plt.figure(figsize=figsize) grid = sns.heatmap(-np.log10(pvalue_mat), cmap='RdBu_r', vmax=20, yticklabels=np.arange(10)+1, xticklabels=classes[:n_cluster], ) grid.set_ylabel('Feature', fontsize=18) grid.set_xticklabels(labels=classes[:n_cluster], rotation=45, fontsize=18) grid.set_yticklabels(labels=np.arange(dim)+1, fontsize=16) cbar = grid.collections[0].colorbar cbar.set_label('-log10 (Pvalue)', fontsize=18) #, rotation=0, x=-0.9, y=0) if save: plt.savefig(save, format='pdf', bbox_inches='tight') else: plt.show()
Example #10
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_nist(self): # These are the nist ANOVA files. They can be found at: # http://www.itl.nist.gov/div898/strd/anova/anova.html filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat', 'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat', 'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat'] for test_case in filenames: rtol = 1e-7 fname = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data/nist_anova', test_case)) with open(fname, 'r') as f: content = f.read().split('\n') certified = [line.split() for line in content[40:48] if line.strip()] dataf = np.loadtxt(fname, skiprows=60) y, x = dataf.T y = y.astype(int) caty = np.unique(y) f = float(certified[0][-1]) xlist = [x[y == i] for i in caty] res = stats.f_oneway(*xlist) # With the hard test cases we relax the tolerance a bit. hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat') if test_case in hard_tc: rtol = 1e-4 assert_allclose(res[0], f, rtol=rtol, err_msg='Failing testcase: %s' % test_case)
Example #11
Source File: pancreas_tests.py From scanorama with MIT License | 5 votes |
def print_oneway(X, genes, ds_labels): for gene_idx, gene in enumerate(genes): ds_names = sorted(set(ds_labels)) dist = [] for ds in ds_names: dist.append(X[ds_labels == ds, gene_idx]) sys.stdout.write('{}\t'.format(gene)) print('{}\t{}'.format(*f_oneway(*dist)))
Example #12
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_large_integer_array(self): a = np.array([655, 788], dtype=np.uint16) b = np.array([789, 772], dtype=np.uint16) F, p = stats.f_oneway(a, b) assert_almost_equal(F, 0.77450216931805538)
Example #13
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_basic(self): # Despite being a floating point calculation, this data should # result in F being exactly 2.0. F, p = stats.f_oneway([0,2], [2,4]) assert_equal(F, 2.0)
Example #14
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_trivial(self): # A trivial test of stats.f_oneway, with F=0. F, p = stats.f_oneway([0,2], [0,2]) assert_equal(F, 0.0)
Example #15
Source File: eda.py From xam with MIT License | 5 votes |
def feature_importance_regression(features, target, n_neighbors=3, random_state=None): cont = features.select_dtypes(include=[np.floating]) disc = features.select_dtypes(include=[np.integer, np.bool]) cont_imp = pd.DataFrame(index=cont.columns) disc_imp = pd.DataFrame(index=disc.columns) # Continuous features if cont_imp.index.size > 0: # Pearson correlation pearson = np.array([stats.pearsonr(feature, target) for _, feature in cont.iteritems()]) cont_imp['pearson_r'] = pearson[:, 0] cont_imp['pearson_r_p_value'] = pearson[:, 1] # Mutual information mut_inf = feature_selection.mutual_info_regression(cont, target, discrete_features=False, n_neighbors=n_neighbors, random_state=random_state) cont_imp['mutual_information'] = mut_inf # Discrete features if disc_imp.index.size > 0: # F-test f_tests = defaultdict(dict) for feature in disc.columns: groups = [target[idxs] for idxs in disc.groupby(feature).groups.values()] statistic, p_value = stats.f_oneway(*groups) f_tests[feature]['f_statistic'] = statistic f_tests[feature]['f_p_value'] = p_value f_tests_df = pd.DataFrame.from_dict(f_tests, orient='index') disc_imp['f_statistic'] = f_tests_df['f_statistic'] disc_imp['f_p_value'] = f_tests_df['f_p_value'] # Mutual information mut_inf = feature_selection.mutual_info_regression(disc, target, discrete_features=True, n_neighbors=n_neighbors, random_state=random_state) disc_imp['mutual_information'] = mut_inf return cont_imp, disc_imp
Example #16
Source File: test_feature_select.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_f_oneway_ints(): # Smoke test f_oneway on integers: that it does raise casting errors # with recent numpys rng = np.random.RandomState(0) X = rng.randint(10, size=(10, 10)) y = np.arange(10) fint, pint = f_oneway(X, y) # test that is gives the same result as with float f, p = f_oneway(X.astype(np.float), y) assert_array_almost_equal(f, fint, decimal=4) assert_array_almost_equal(p, pint, decimal=4)
Example #17
Source File: test_feature_select.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_f_oneway_vs_scipy_stats(): # Test that our f_oneway gives the same result as scipy.stats rng = np.random.RandomState(0) X1 = rng.randn(10, 3) X2 = 1 + rng.randn(10, 3) f, pv = stats.f_oneway(X1, X2) f2, pv2 = f_oneway(X1, X2) assert np.allclose(f, f2) assert np.allclose(pv, pv2)
Example #18
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_basic(self): # A test of stats.f_oneway, with F=2. F, p = stats.f_oneway([0,2], [2,4]) # Despite being a floating point calculation, this data should # result in F being exactly 2.0. assert_equal(F, 2.0)
Example #19
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_trivial(self): # A trivial test of stats.f_oneway, with F=0. F, p = stats.f_oneway([0,2], [0,2]) assert_equal(F, 0.0)
Example #20
Source File: ANOVA.py From TabPy with MIT License | 5 votes |
def anova(_arg1, _arg2, *_argN): """ ANOVA is a statistical hypothesis test that is used to compare two or more group means for equality.For more information on the function and how to use it please refer to tabpy-tools.md """ cols = [_arg1, _arg2] + list(_argN) for col in cols: if not isinstance(col[0], (int, float)): print("values must be numeric") raise ValueError _, p_value = stats.f_oneway(_arg1, _arg2, *_argN) return p_value
Example #21
Source File: numerical_comparison.py From DIVE-backend with GNU General Public License v3.0 | 4 votes |
def get_valid_tests(equal_var, independent, normal, num_samples): ''' Get valid tests given number of samples and statistical characterization of samples: Equal variance Indepenence Normality ''' if num_samples == 1: valid_tests = { 'chisquare': stats.chisquare, 'power_divergence': stats.power_divergence, 'kstest': stats.kstest } if normal: valid_tests['input']['one_sample_ttest'] = stats.ttest_1samp elif num_samples == 2: if independent: valid_tests = { 'mannwhitneyu': stats.mannwhitneyu, 'kruskal': stats.kruskal, 'ks_2samp': stats.ks_2samp } if normal: valid_tests['two_sample_ttest'] = stats.ttest_ind if equal_var: valid_tests['f_oneway'] = stats.f_oneway else: valid_tests = { 'two_sample_ks': stats.ks_2samp, 'wilcoxon': stats.wilcoxon } if normal: valid_tests['two_sample_related_ttest'] = stats.ttest_rel elif num_samples >= 3: if independent: valid_tests = { 'kruskal': stats.kruskal } if normal and equal_var: valid_tests['f_oneway'] = stats.f_oneway else: valid_tests['friedmanchisquare'] = stats.friedmanchisquare return valid_tests