Python statsmodels.formula.api.ols() Examples
The following are 30
code examples of statsmodels.formula.api.ols().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
statsmodels.formula.api
, or try the search function
.
![](https://www.programcreek.com/common/static/images/search.png)
Example #1
Source File: test_formula.py From vnpy_crypto with MIT License | 9 votes |
def test_formula_predict_series(): import pandas as pd import pandas.util.testing as tm data = pd.DataFrame({"y": [1, 2, 3], "x": [1, 2, 3]}, index=[5, 3, 1]) results = ols('y ~ x', data).fit() result = results.predict(data) expected = pd.Series([1., 2., 3.], index=[5, 3, 1]) tm.assert_series_equal(result, expected) result = results.predict(data.x) tm.assert_series_equal(result, expected) result = results.predict(pd.Series([1, 2, 3], index=[1, 2, 3], name='x')) expected = pd.Series([1., 2., 3.], index=[1, 2, 3]) tm.assert_series_equal(result, expected) result = results.predict({"x": [1, 2, 3]}) expected = pd.Series([1., 2., 3.], index=[0, 1, 2]) tm.assert_series_equal(result, expected)
Example #2
Source File: test_anova.py From vnpy_crypto with MIT License | 8 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 1, 2, 2, 51 ]) F = np.array([ 266.9361, 5.12115, 12.3122, 0.1529943, np.nan ]) PrF = np.array([ 6.54355e-22, 0.02792296, 4.336712e-05, 0.858527, np.nan ]) results = anova_lm(anova_iii, typ="III", robust="hc1") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #3
Source File: test_anova.py From vnpy_crypto with MIT License | 7 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 2, 2, 51 ]) F = np.array([ 6.972744, 13.7804, 0.1709936, np.nan ]) PrF = np.array([ 0.01095599, 1.641682e-05, 0.8433081, np.nan ]) results = anova_lm(anova_ii, typ="II", robust="hc0") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #4
Source File: test_anova.py From vnpy_crypto with MIT License | 7 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 2, 2, 51 ]) F = np.array([ 6.267499, 12.25354, 0.1501224, np.nan ]) PrF = np.array([ 0.01554009, 4.511826e-05, 0.8609815, np.nan ]) results = anova_lm(anova_ii, typ="II", robust="hc2") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #5
Source File: test_formula.py From vnpy_crypto with MIT License | 7 votes |
def test_patsy_lazy_dict(): class LazyDict(dict): def __init__(self, data): self.data = data def __missing__(self, key): return np.array(self.data[key]) data = cpunish.load_pandas().data data = LazyDict(data) res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit() res2 = res.predict(data) npt.assert_allclose(res.fittedvalues, res2) data = cpunish.load_pandas().data data['INCOME'].loc[0] = None data = LazyDict(data) data.index = cpunish.load_pandas().data.index res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit() res2 = res.predict(data) assert_equal(res.fittedvalues, res2) # Should lose a record assert_equal(len(res2) + 1, len(cpunish.load_pandas().data))
Example #6
Source File: test_regression.py From vnpy_crypto with MIT License | 6 votes |
def test_formula_missing_cat(): # gh-805 import statsmodels.api as sm from statsmodels.formula.api import ols from patsy import PatsyError dta = sm.datasets.grunfeld.load_pandas().data dta.loc[dta.index[0], 'firm'] = np.nan mod = ols(formula='value ~ invest + capital + firm + year', data=dta.dropna()) res = mod.fit() mod2 = ols(formula='value ~ invest + capital + firm + year', data=dta) res2 = mod2.fit() assert_almost_equal(res.params.values, res2.params.values) assert_raises(PatsyError, ols, 'value ~ invest + capital + firm + year', data=dta, missing='raise')
Example #7
Source File: test_anova.py From vnpy_crypto with MIT License | 6 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 2, 2, 51 ]) F = np.array([ 6.238771, 12.32983, 0.1529943, np.nan ]) PrF = np.array([ 0.01576555, 4.285456e-05, 0.858527, np.nan ]) results = anova_lm(anova_ii, typ="II", robust="hc1") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #8
Source File: test_anova.py From vnpy_crypto with MIT License | 6 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 2, 2, 51 ]) F = np.array([ 5.633786, 10.89842, 0.1317223, np.nan ]) PrF = np.array([ 0.02142223, 0.0001145965, 0.8768817, np.nan ]) results = anova_lm(anova_ii, typ="II", robust="hc3") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #9
Source File: test_anova.py From vnpy_crypto with MIT License | 6 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 1, 2, 2, 51 ]) F = np.array([ 298.3404, 5.723638, 13.76069, 0.1709936, np.nan ]) PrF = np.array([ 5.876255e-23, 0.02046031, 1.662826e-05, 0.8433081, np.nan ]) results = anova_lm(anova_iii, typ="III", robust="hc0") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #10
Source File: test_anova.py From vnpy_crypto with MIT License | 6 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 1, 2, 2, 51 ]) F = np.array([ 264.5137, 5.074677, 12.19158, 0.1501224, np.nan ]) PrF = np.array([ 7.958286e-22, 0.02860926, 4.704831e-05, 0.8609815, np.nan ]) results = anova_lm(anova_iii, typ="III", robust="hc2") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #11
Source File: test_anova.py From vnpy_crypto with MIT License | 6 votes |
def test_results(self): data = self.data.drop([0,1,2]) anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() Sum_Sq = np.array([ 151.4065, 2.904723, 13.45718, 0.1905093, 27.60181 ]) Df = np.array([ 1, 1, 2, 2, 51 ]) F = np.array([ 234.4026, 4.496996, 10.79903, 0.1317223, np.nan ]) PrF = np.array([ 1.037224e-20, 0.03883841, 0.0001228716, 0.8768817, np.nan ]) results = anova_lm(anova_iii, typ="III", robust="hc3") np.testing.assert_equal(results['df'].values, Df) #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4) np.testing.assert_almost_equal(results['F'].values, F, 4) np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
Example #12
Source File: questionnaire.py From reportgen with MIT License | 6 votes |
def anova(data,formula): '''方差分析 输入 --data: DataFrame格式,包含数值型变量和分类型变量 --formula:变量之间的关系,如:数值型变量~C(分类型变量1)[+C(分类型变量1)[+C(分类型变量1):(分类型变量1)] 返回[方差分析表] [总体的方差来源于组内方差和组间方差,通过比较组间方差和组内方差的比来推断两者的差异] --df:自由度 --sum_sq:误差平方和 --mean_sq:误差平方和/对应的自由度 --F:mean_sq之比 --PR(>F):p值,比如<0.05则代表有显著性差异 ''' import statsmodels.api as sm from statsmodels.formula.api import ols cw_lm=ols(formula, data=data).fit() #Specify C for Categorical r=sm.stats.anova_lm(cw_lm) return r
Example #13
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def initialize(cls): from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit(use_t=False)
Example #14
Source File: test_downstream.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() # Cython import warning
Example #15
Source File: smoothers.py From plotnine with GNU General Public License v2.0 | 5 votes |
def predictdf(data, xseq, **params): methods = { 'lm': lm, 'ols': lm, 'wls': lm, 'rlm': rlm, 'glm': glm, 'gls': gls, 'lowess': lowess, 'loess': loess, 'mavg': mavg, 'gpr': gpr, } method = params['method'] if isinstance(method, str): try: method = methods[method] except KeyError: msg = "Method should be one of {}" raise PlotnineError(msg.format(list(methods.keys()))) if not hasattr(method, '__call__'): msg = ("'method' should either be a string or a function" "with the signature `func(data, xseq, **params)`") raise PlotnineError() return method(data, xseq, **params)
Example #16
Source File: test_eval.py From tea-lang with Apache License 2.0 | 5 votes |
def f_test(x_name, y_name, df): # F-test, Factorial ANOVA formula = ols(f"{y_name} ~ C({x_name})", data=df) model = formula.fit() res = sm.stats.anova_lm(model, type=2) return res
Example #17
Source File: test_eval.py From tea-lang with Apache License 2.0 | 5 votes |
def factorial(xs, y, df): # assert(len(y) == 0) formula = f"{y} ~ " for i in range(len(xs)): x = xs[i] formula += f"C({x})" if i < len(xs) - 1: formula += " + " # Add the interactions interactions = [] for i in range(len(xs)): x_i = xs[i] inter = f"C({x_i})" for j in range(len(xs)): if i != j: x_j = xs[j] inter += " * " + f"C({x_j})" interactions.append(inter) if _is_interaction_unique(interactions, inter): formula += " + " + inter ols_formula = ols(formula, data=df) model = ols_formula.fit() return sm.stats.anova_lm(model, type=2)
Example #18
Source File: markers.py From dynamo-release with BSD 3-Clause "New" or "Revised" License | 5 votes |
def diff_test_helper(data, fullModelFormulaStr="~cr(time, df=3)", reducedModelFormulaStr="~1", ): # Dividing data into train and validation datasets transformed_x = dmatrix(fullModelFormulaStr, data, return_type='dataframe') transformed_x_null = dmatrix(reducedModelFormulaStr, data, return_type='dataframe') expression = data['expression'] poisson_training_results = sm.GLM(expression, transformed_x, family=sm.families.Poisson()).fit() poisson_df = pd.DataFrame({'mu': poisson_training_results.mu, 'expression': expression}) poisson_df['AUX_OLS_DEP'] = poisson_df.apply(lambda x: ((x['expression'] - x['mu']) ** 2 - x['expression']) / x['mu'], axis=1) ols_expr = """AUX_OLS_DEP ~ mu - 1""" aux_olsr_results = smf.ols(ols_expr, poisson_df).fit() nb2_family = sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0]) try: nb2_full = sm.GLM(expression, transformed_x, family=nb2_family).fit() nb2_null = sm.GLM(expression, transformed_x_null, family=nb2_family).fit() except: return ('fail', 'NB2', 1) pval = lrt(nb2_full, nb2_null) return ('ok', 'NB2', pval)
Example #19
Source File: test_downstream.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()
Example #20
Source File: test_downstream.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()
Example #21
Source File: TargetAnalysisContinuous.py From exploripy with MIT License | 5 votes |
def TwoWayAnova(self,categorical1, categorical2, continuous): df = self.df[[categorical1,categorical2,continuous]] df = df.dropna() function = continuous + ' ~ C(' + categorical1 + ')*C('+ categorical2 + ')' print(function) lm = ols(function, data=df).fit(method='powell') table = sm.stats.anova_lm(lm, typ=3) return table.iloc[2]['PR(>F)']<0.05
Example #22
Source File: test_anova.py From vnpy_crypto with MIT License | 5 votes |
def test_results(self): new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight) - 1", self.data).fit() results = anova_lm(new_model, self.kidney_lm) Res_Df = np.array([ 56, 54 ]) RSS = np.array([ 29.62486, 28.9892 ]) Df = np.array([ 0, 2 ]) Sum_of_Sq = np.array([ np.nan, 0.6356584 ]) F = np.array([ np.nan, 0.5920404 ]) PrF = np.array([ np.nan, 0.5567479 ]) np.testing.assert_equal(results["df_resid"].values, Res_Df) np.testing.assert_almost_equal(results["ssr"].values, RSS, 4) np.testing.assert_almost_equal(results["df_diff"].values, Df) np.testing.assert_almost_equal(results["ss_diff"].values, Sum_of_Sq) np.testing.assert_almost_equal(results["F"].values, F) np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF)
Example #23
Source File: test_downstream.py From recruit with Apache License 2.0 | 5 votes |
def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() # Cython import warning
Example #24
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def initialize(cls): from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit() # default use_t=True
Example #25
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def initialize(cls): from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson mod = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit(use_t=False)
Example #26
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def initialize(cls): from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit() # default use_t=True
Example #27
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def setup_class(cls): from statsmodels.formula.api import ols import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() cls.data = test.data.drop([0,1,2]) mod = ols("np.log(Days+1) ~ C(Duration) + C(Weight)", cls.data) cls.res = mod.fit() cls.term_name = "C(Weight)" cls.constraints = ['C(Weight)[T.2]', 'C(Weight)[T.3]', 'C(Weight)[T.3] - C(Weight)[T.2]']
Example #28
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def setup_class(cls): from statsmodels.formula.api import ols import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() cls.data = test.data.drop([0,1,2]) mod = ols("np.log(Days+1) ~ C(Weight) + C(Duration) - 1", cls.data) cls.res = mod.fit() cls.term_name = "C(Weight)" cls.constraints = ['C(Weight)[2] - C(Weight)[1]', 'C(Weight)[3] - C(Weight)[1]', 'C(Weight)[3] - C(Weight)[2]']
Example #29
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def setup_class(cls): from statsmodels.formula.api import ols import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() cls.data = test.data.drop([0,1,2]) mod = ols("np.log(Days+1) ~ C(Weight, Treatment(2)) + C(Duration)", cls.data) cls.res = mod.fit() cls.term_name = "C(Weight, Treatment(2))" cls.constraints = ['-C(Weight, Treatment(2))[T.1]', 'C(Weight, Treatment(2))[T.3] - C(Weight, Treatment(2))[T.1]', 'C(Weight, Treatment(2))[T.3]',]
Example #30
Source File: test_regressionplots.py From vnpy_crypto with MIT License | 5 votes |
def test_one_column_exog(self): from statsmodels.formula.api import ols res = ols("y~var1-1", data=self.data).fit() fig = plot_regress_exog(res, "var1") plt.close(fig) res = ols("y~var1", data=self.data).fit() fig = plot_regress_exog(res, "var1") plt.close(fig)