Python statsmodels.formula.api.ols() Examples

The following are 30 code examples of statsmodels.formula.api.ols(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module statsmodels.formula.api , or try the search function .
Example #1
Source File: test_formula.py    From vnpy_crypto with MIT License 9 votes vote down vote up
def test_formula_predict_series():
    import pandas as pd
    import pandas.util.testing as tm
    data = pd.DataFrame({"y": [1, 2, 3], "x": [1, 2, 3]}, index=[5, 3, 1])
    results = ols('y ~ x', data).fit()

    result = results.predict(data)
    expected = pd.Series([1., 2., 3.], index=[5, 3, 1])
    tm.assert_series_equal(result, expected)

    result = results.predict(data.x)
    tm.assert_series_equal(result, expected)

    result = results.predict(pd.Series([1, 2, 3], index=[1, 2, 3], name='x'))
    expected = pd.Series([1., 2., 3.], index=[1, 2, 3])
    tm.assert_series_equal(result, expected)

    result = results.predict({"x": [1, 2, 3]})
    expected = pd.Series([1., 2., 3.], index=[0, 1, 2])
    tm.assert_series_equal(result, expected) 
Example #2
Source File: test_anova.py    From vnpy_crypto with MIT License 8 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 1, 2, 2, 51
            ])
        F = np.array([
             266.9361, 5.12115, 12.3122, 0.1529943, np.nan
            ])
        PrF = np.array([
             6.54355e-22, 0.02792296, 4.336712e-05, 0.858527, np.nan
            ])

        results = anova_lm(anova_iii, typ="III", robust="hc1")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #3
Source File: test_anova.py    From vnpy_crypto with MIT License 7 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 2, 2, 51
            ])
        F = np.array([
             6.972744, 13.7804, 0.1709936, np.nan
            ])
        PrF = np.array([
             0.01095599, 1.641682e-05, 0.8433081, np.nan
            ])

        results = anova_lm(anova_ii, typ="II", robust="hc0")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #4
Source File: test_anova.py    From vnpy_crypto with MIT License 7 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 2, 2, 51
            ])
        F = np.array([
             6.267499, 12.25354, 0.1501224, np.nan
            ])
        PrF = np.array([
             0.01554009, 4.511826e-05, 0.8609815, np.nan
            ])


        results = anova_lm(anova_ii, typ="II", robust="hc2")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #5
Source File: test_formula.py    From vnpy_crypto with MIT License 7 votes vote down vote up
def test_patsy_lazy_dict():
    class LazyDict(dict):
        def __init__(self, data):
            self.data = data

        def __missing__(self, key):
            return np.array(self.data[key])

    data = cpunish.load_pandas().data
    data = LazyDict(data)
    res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit()

    res2 = res.predict(data)
    npt.assert_allclose(res.fittedvalues, res2)

    data = cpunish.load_pandas().data
    data['INCOME'].loc[0] = None

    data = LazyDict(data)
    data.index = cpunish.load_pandas().data.index
    res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit()

    res2 = res.predict(data)
    assert_equal(res.fittedvalues, res2)  # Should lose a record
    assert_equal(len(res2) + 1, len(cpunish.load_pandas().data)) 
Example #6
Source File: test_regression.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_formula_missing_cat():
    # gh-805

    import statsmodels.api as sm
    from statsmodels.formula.api import ols
    from patsy import PatsyError

    dta = sm.datasets.grunfeld.load_pandas().data
    dta.loc[dta.index[0], 'firm'] = np.nan

    mod = ols(formula='value ~ invest + capital + firm + year',
              data=dta.dropna())
    res = mod.fit()

    mod2 = ols(formula='value ~ invest + capital + firm + year',
               data=dta)
    res2 = mod2.fit()

    assert_almost_equal(res.params.values, res2.params.values)

    assert_raises(PatsyError, ols, 'value ~ invest + capital + firm + year',
                  data=dta, missing='raise') 
Example #7
Source File: test_anova.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 2, 2, 51
            ])
        F = np.array([
             6.238771, 12.32983, 0.1529943, np.nan
            ])
        PrF = np.array([
             0.01576555, 4.285456e-05, 0.858527, np.nan
            ])

        results = anova_lm(anova_ii, typ="II", robust="hc1")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #8
Source File: test_anova.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 2, 2, 51
            ])
        F = np.array([
             5.633786, 10.89842, 0.1317223, np.nan
            ])
        PrF = np.array([
             0.02142223, 0.0001145965, 0.8768817, np.nan
            ])

        results = anova_lm(anova_ii, typ="II", robust="hc3")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #9
Source File: test_anova.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 1, 2, 2, 51
            ])
        F = np.array([
             298.3404, 5.723638, 13.76069, 0.1709936, np.nan
            ])
        PrF = np.array([
             5.876255e-23, 0.02046031, 1.662826e-05, 0.8433081, np.nan
            ])

        results = anova_lm(anova_iii, typ="III", robust="hc0")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #10
Source File: test_anova.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 1, 2, 2, 51
            ])
        F = np.array([
             264.5137, 5.074677, 12.19158, 0.1501224, np.nan
            ])
        PrF = np.array([
             7.958286e-22, 0.02860926, 4.704831e-05, 0.8609815, np.nan
            ])

        results = anova_lm(anova_iii, typ="III", robust="hc2")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #11
Source File: test_anova.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_results(self):
        data = self.data.drop([0,1,2])
        anova_iii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                                data).fit()

        Sum_Sq = np.array([
             151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
            ])
        Df = np.array([
             1, 1, 2, 2, 51
            ])
        F = np.array([
             234.4026, 4.496996, 10.79903, 0.1317223, np.nan
            ])
        PrF = np.array([
             1.037224e-20, 0.03883841, 0.0001228716, 0.8768817, np.nan
            ])

        results = anova_lm(anova_iii, typ="III", robust="hc3")
        np.testing.assert_equal(results['df'].values, Df)
        #np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
        np.testing.assert_almost_equal(results['F'].values, F, 4)
        np.testing.assert_almost_equal(results['PR(>F)'].values, PrF) 
Example #12
Source File: questionnaire.py    From reportgen with MIT License 6 votes vote down vote up
def anova(data,formula):
    '''方差分析
    输入
    --data: DataFrame格式,包含数值型变量和分类型变量
    --formula:变量之间的关系,如:数值型变量~C(分类型变量1)[+C(分类型变量1)[+C(分类型变量1):(分类型变量1)]

    返回[方差分析表]
    [总体的方差来源于组内方差和组间方差,通过比较组间方差和组内方差的比来推断两者的差异]
    --df:自由度
    --sum_sq:误差平方和
    --mean_sq:误差平方和/对应的自由度
    --F:mean_sq之比
    --PR(>F):p值,比如<0.05则代表有显著性差异
    '''
    import statsmodels.api as sm
    from statsmodels.formula.api import ols
    cw_lm=ols(formula, data=data).fit() #Specify C for Categorical
    r=sm.stats.anova_lm(cw_lm)
    return r 
Example #13
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def initialize(cls):
        from statsmodels.formula.api import ols, glm, poisson
        from statsmodels.discrete.discrete_model import Poisson

        mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit(use_t=False) 
Example #14
Source File: test_downstream.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_statsmodels():

    statsmodels = import_module('statsmodels')  # noqa
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    df = sm.datasets.get_rdataset("Guerry", "HistData").data
    smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()


# Cython import warning 
Example #15
Source File: smoothers.py    From plotnine with GNU General Public License v2.0 5 votes vote down vote up
def predictdf(data, xseq, **params):
    methods = {
        'lm': lm,
        'ols': lm,
        'wls': lm,
        'rlm': rlm,
        'glm': glm,
        'gls': gls,
        'lowess': lowess,
        'loess': loess,
        'mavg': mavg,
        'gpr': gpr,
    }

    method = params['method']

    if isinstance(method, str):
        try:
            method = methods[method]
        except KeyError:
            msg = "Method should be one of {}"
            raise PlotnineError(msg.format(list(methods.keys())))

    if not hasattr(method, '__call__'):
        msg = ("'method' should either be a string or a function"
               "with the signature `func(data, xseq, **params)`")
        raise PlotnineError()

    return method(data, xseq, **params) 
Example #16
Source File: test_eval.py    From tea-lang with Apache License 2.0 5 votes vote down vote up
def f_test(x_name, y_name, df):
    # F-test, Factorial ANOVA
    formula = ols(f"{y_name} ~ C({x_name})", data=df)
    model = formula.fit()
    res = sm.stats.anova_lm(model, type=2)
    return res 
Example #17
Source File: test_eval.py    From tea-lang with Apache License 2.0 5 votes vote down vote up
def factorial(xs, y, df):
    # assert(len(y) == 0)
    formula = f"{y} ~ "

    for i in range(len(xs)):
        x = xs[i]
        formula += f"C({x})"

        if i < len(xs) - 1:
            formula += " + "

    # Add the interactions
    interactions = []
    for i in range(len(xs)):
        x_i = xs[i]
        inter = f"C({x_i})"
        for j in range(len(xs)):
            if i != j:
                x_j = xs[j]
                inter += " * " + f"C({x_j})"
                interactions.append(inter)

                if _is_interaction_unique(interactions, inter):
                    formula += " + " + inter

    ols_formula = ols(formula, data=df)
    model = ols_formula.fit()
    return sm.stats.anova_lm(model, type=2) 
Example #18
Source File: markers.py    From dynamo-release with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def diff_test_helper(data,
                     fullModelFormulaStr="~cr(time, df=3)",
                     reducedModelFormulaStr="~1",
                     ):
    # Dividing data into train and validation datasets
    transformed_x = dmatrix(fullModelFormulaStr, data, return_type='dataframe')
    transformed_x_null = dmatrix(reducedModelFormulaStr, data, return_type='dataframe')

    expression = data['expression']
    poisson_training_results = sm.GLM(expression, transformed_x, family=sm.families.Poisson()).fit()
    poisson_df = pd.DataFrame({'mu': poisson_training_results.mu, 'expression': expression})
    poisson_df['AUX_OLS_DEP'] = poisson_df.apply(lambda x: ((x['expression'] - x['mu']) ** 2
                                                            - x['expression']) / x['mu'], axis=1)
    ols_expr = """AUX_OLS_DEP ~ mu - 1"""
    aux_olsr_results = smf.ols(ols_expr, poisson_df).fit()

    nb2_family = sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])

    try:
        nb2_full = sm.GLM(expression, transformed_x, family=nb2_family).fit()
        nb2_null = sm.GLM(expression, transformed_x_null, family=nb2_family).fit()
    except:
        return ('fail', 'NB2', 1)

    pval = lrt(nb2_full, nb2_null)
    return ('ok', 'NB2', pval) 
Example #19
Source File: test_downstream.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_statsmodels():

    statsmodels = import_module('statsmodels')  # noqa
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    df = sm.datasets.get_rdataset("Guerry", "HistData").data
    smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() 
Example #20
Source File: test_downstream.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_statsmodels():

    statsmodels = import_module('statsmodels')  # noqa
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    df = sm.datasets.get_rdataset("Guerry", "HistData").data
    smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() 
Example #21
Source File: TargetAnalysisContinuous.py    From exploripy with MIT License 5 votes vote down vote up
def TwoWayAnova(self,categorical1, categorical2, continuous):
		df = self.df[[categorical1,categorical2,continuous]]
		df = df.dropna()
		
		function = continuous + ' ~ C(' + categorical1 + ')*C('+ categorical2 + ')' 
		print(function)
		lm = ols(function, data=df).fit(method='powell')
		table = sm.stats.anova_lm(lm, typ=3)
		return table.iloc[2]['PR(>F)']<0.05 
Example #22
Source File: test_anova.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_results(self):
        new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight) - 1",
                        self.data).fit()
        results = anova_lm(new_model, self.kidney_lm)

        Res_Df = np.array([
             56, 54
            ])
        RSS = np.array([
             29.62486, 28.9892
            ])
        Df = np.array([
             0, 2
            ])
        Sum_of_Sq = np.array([
             np.nan, 0.6356584
            ])
        F = np.array([
             np.nan, 0.5920404
            ])
        PrF = np.array([
             np.nan, 0.5567479
            ])

        np.testing.assert_equal(results["df_resid"].values, Res_Df)
        np.testing.assert_almost_equal(results["ssr"].values, RSS, 4)
        np.testing.assert_almost_equal(results["df_diff"].values, Df)
        np.testing.assert_almost_equal(results["ss_diff"].values, Sum_of_Sq)
        np.testing.assert_almost_equal(results["F"].values, F)
        np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF) 
Example #23
Source File: test_downstream.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_statsmodels():

    statsmodels = import_module('statsmodels')  # noqa
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    df = sm.datasets.get_rdataset("Guerry", "HistData").data
    smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()


# Cython import warning 
Example #24
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def initialize(cls):
        from statsmodels.formula.api import ols, glm, poisson
        from statsmodels.discrete.discrete_model import Poisson

        mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit()  # default use_t=True 
Example #25
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def initialize(cls):
        from statsmodels.formula.api import ols, glm, poisson
        from statsmodels.discrete.discrete_model import Poisson

        mod = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit(use_t=False) 
Example #26
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def initialize(cls):
        from statsmodels.formula.api import ols, glm, poisson
        from statsmodels.discrete.discrete_model import Poisson

        mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit()  # default use_t=True 
Example #27
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup_class(cls):
        from statsmodels.formula.api import ols
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = ols("np.log(Days+1) ~ C(Duration) + C(Weight)", cls.data)
        cls.res = mod.fit()
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[T.2]',
                           'C(Weight)[T.3]',
                           'C(Weight)[T.3] - C(Weight)[T.2]'] 
Example #28
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup_class(cls):
        from statsmodels.formula.api import ols
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = ols("np.log(Days+1) ~ C(Weight) + C(Duration) - 1", cls.data)
        cls.res = mod.fit()
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[2] - C(Weight)[1]',
                           'C(Weight)[3] - C(Weight)[1]',
                           'C(Weight)[3] - C(Weight)[2]'] 
Example #29
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup_class(cls):
        from statsmodels.formula.api import ols
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = ols("np.log(Days+1) ~ C(Weight, Treatment(2)) + C(Duration)", cls.data)
        cls.res = mod.fit()
        cls.term_name = "C(Weight, Treatment(2))"
        cls.constraints = ['-C(Weight, Treatment(2))[T.1]',
                           'C(Weight, Treatment(2))[T.3] - C(Weight, Treatment(2))[T.1]',
                           'C(Weight, Treatment(2))[T.3]',] 
Example #30
Source File: test_regressionplots.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_one_column_exog(self):
        from statsmodels.formula.api import ols
        res = ols("y~var1-1", data=self.data).fit()
        fig = plot_regress_exog(res, "var1")
        plt.close(fig)
        res = ols("y~var1", data=self.data).fit()
        fig = plot_regress_exog(res, "var1")
        plt.close(fig)