Python statsmodels.tsa.stattools.adfuller() Examples

The following are 22 code examples of statsmodels.tsa.stattools.adfuller(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module statsmodels.tsa.stattools , or try the search function .
Example #1
Source File: Stationarity.py    From METU-BA4318-Spring2019 with GNU General Public License v3.0 7 votes vote down vote up
def test_stationarity(timeseries):
    #Determing rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    #Perform Dickey-Fuller test:
    print("Results of Dickey-Fuller Test:")
    array = np.asarray(timeseries, dtype='float')
    np.nan_to_num(array,copy=False)
    dftest = adfuller(array, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)


# Load data 
Example #2
Source File: transformation_function.py    From arauto with Apache License 2.0 7 votes vote down vote up
def test_custom_difference(self, custom_transformation_size):
        self.d = custom_transformation_size[0]
        self.D = custom_transformation_size[1]

        self.transformed_time_series = self.original_timeseries.diff(self.d).diff(self.seasonality * self.D).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = lambda x: x

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff({}).diff({}).dropna(), autolag='AIC')
                '''.format(self.d, self.D)

        self.label = 'Custom Difference' if self.dftest[0] < self.dftest[4]['1%'] else None

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #3
Source File: Stationarity-Patients.py    From METU-BA4318-Spring2019 with GNU General Public License v3.0 6 votes vote down vote up
def test_stationarity(timeseries):
    #Determing rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    #Perform Dickey-Fuller test:
    print("Results of Dickey-Fuller Test:")
    array = np.asarray(timeseries, dtype='float')
    np.nan_to_num(array,copy=False)
    dftest = adfuller(array, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)


# Load data 
Example #4
Source File: transformation_function.py    From arauto with Apache License 2.0 5 votes vote down vote up
def test_seasonal_log_difference(self):
        '''
        Run the Adfuller test on the original data with seasonal difference and log first difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = np.log1p(self.original_timeseries).diff().diff(self.seasonality).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = np.log1p

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                df = np.log1p(df)
                dftest = adfuller(df.diff().diff({}).dropna(), autolag='AIC')
                '''.format(self.seasonality)

        self.label = 'Log Difference + Seasonal Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 1
        self.D = 1

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #5
Source File: DyStockDataML.py    From DevilYuan with MIT License 5 votes vote down vote up
def adfTest(s):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1] 
Example #6
Source File: DySS_LimitUpAnalysis.py    From DevilYuan with MIT License 5 votes vote down vote up
def _adfTest(self, df):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        df = df[-21:-1]

        result = stattools.adfuller(df['close'], 1)

        return result[1] 
Example #7
Source File: DySS_Pairs.py    From DevilYuan with MIT License 5 votes vote down vote up
def _adfTest(self, s):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1] 
Example #8
Source File: process_raw_prices.py    From pairstrade-fyp-2019 with MIT License 5 votes vote down vote up
def get_filename_without_ext(path):
    filename = os.path.basename(path)
    return os.path.splitext(filename)[0]


# def compute_intercept_and_pvalue(p1, p2):
#     # log Y = intercept + log X + c
#     Y, X = pd.Series(np.log(p1)), pd.Series(np.log(p2))

#     # returns (intercept, pvalue)
#     return (np.mean(Y - X), smts.adfuller(Y - X)[1]) 
Example #9
Source File: diagnostic.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False):
    return adfuller(x, maxlag=maxlag, regression=trendorder, autolag=autolag,
                    store=store, regresults=False)


#TODO: I like the bunch pattern for this too. 
Example #10
Source File: seasonality.py    From Learn-Algorithmic-Trading---Fundamentals-of-Algorithmic-Trading with MIT License 5 votes vote down vote up
def test_stationarity(timeseries):
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries[1:], autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    print (dfoutput) 
Example #11
Source File: test_adfuller_lag.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_adf_autolag():
    #see issue #246
    #this is mostly a unit test
    d2 = macrodata.load().data

    for k_trend, tr in enumerate(['nc', 'c', 'ct', 'ctt']):
        #[None:'nc', 0:'c', 1:'ct', 2:'ctt']
        x = np.log(d2['realgdp'])
        xd = np.diff(x)

        #check exog
        adf3 = tsast.adfuller(x, maxlag=None, autolag='aic',
                              regression=tr, store=True, regresults=True)
        st2 = adf3[-1]

        assert_equal(len(st2.autolag_results), 15 + 1)  #+1 for lagged level
        for l, res in sorted(iteritems(st2.autolag_results))[:5]:
            lag = l-k_trend
            #assert correct design matrices in _autolag
            assert_equal(res.model.exog[-10:,k_trend], x[-11:-1])
            assert_equal(res.model.exog[-1,k_trend+1:], xd[-lag:-1][::-1])
            #min-ic lag of dfgls in Stata is also 2, or 9 for maic with notrend
            assert_equal(st2.usedlag, 2)

        #same result with lag fixed at usedlag of autolag
        adf2 = tsast.adfuller(x, maxlag=2, autolag=None, regression=tr)
        assert_almost_equal(adf3[:2], adf2[:2], decimal=12)


    tr = 'c'
    #check maxlag with autolag
    adf3 = tsast.adfuller(x, maxlag=5, autolag='aic',
                          regression=tr, store=True, regresults=True)
    assert_equal(len(adf3[-1].autolag_results), 5 + 1)
    adf3 = tsast.adfuller(x, maxlag=0, autolag='aic',
                          regression=tr, store=True, regresults=True)
    assert_equal(len(adf3[-1].autolag_results), 0 + 1) 
Example #12
Source File: transformation_function.py    From arauto with Apache License 2.0 5 votes vote down vote up
def test_seasonal_difference(self):
        '''
        Run the Adfuller test on the original data with seasonal difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = self.original_timeseries.diff(self.seasonality).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = lambda x: x

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff({}).dropna(), autolag='AIC')
                    '''.format(self.seasonality)
        self.label = 'Seasonality Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 0
        self.D = 1

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #13
Source File: transformation_function.py    From arauto with Apache License 2.0 5 votes vote down vote up
def test_log_transformation(self):
        '''
        Run the Adfuller test on the original data with log transformation

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = np.log1p(self.original_timeseries)
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = np.log1p

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                df = np.log1p(df) 
                dftest = adfuller(np.log1p(df), autolag='AIC')
                    '''
        self.label = 'Log transformation' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 0
        self.D = 0

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #14
Source File: transformation_function.py    From arauto with Apache License 2.0 5 votes vote down vote up
def test_first_difference(self):
        '''
        Run the Adfuller test on the original data with first difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = self.original_timeseries.diff().dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff().dropna(), autolag='AIC')
                    '''
        self.label = 'Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 1
        self.D = 0
        
        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #15
Source File: transformation_function.py    From arauto with Apache License 2.0 5 votes vote down vote up
def test_absolute_data(self):
        '''
        Run the Adfuller test on the original data, without transformation

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied. For this function,
                it returns the original time series, since no transformations are applied. 
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation. For this function, no differencing is
                applied, since it returns the original time series 
            D (int): the amount of seasonal integrated terms used in this function/transformation. For this function, no differencing is
                applied, since it returns the original time series
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.dftest = adfuller(self.original_timeseries, autolag='AIC')
        
        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df, autolag='AIC')
                    '''
        self.label = 'Absolute' if self.dftest[0] < self.dftest[4]['1%'] else None
        
        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality 
Example #16
Source File: DyStockDataML.py    From DevilYuan with MIT License 5 votes vote down vote up
def adfTest(s):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1] 
Example #17
Source File: DySS_LimitUpAnalysis.py    From DevilYuan with MIT License 5 votes vote down vote up
def _adfTest(self, df):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        df = df[-21:-1]

        result = stattools.adfuller(df['close'], 1)

        return result[1] 
Example #18
Source File: DySS_Pairs.py    From DevilYuan with MIT License 5 votes vote down vote up
def _adfTest(self, s):
        """
            ADF Test
            p值越大:随机漫步,可能是趋势
            p值越小:均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1] 
Example #19
Source File: diff_nonstationary.py    From HN_SO_analysis with MIT License 5 votes vote down vote up
def diff_nonstationary(x, alpha):
    
    """Returns number of differentiations required to transform
    a non-stationary time series into a stationary one. If 0 (zero) is
    returned, there's no need to differentiate."""
    """
    PARAMETERS:
    1) x - input time series
    2) alpha - significance level
    """
    
    i = 0 # no need to differentiate
    pvalue = adfuller(x, regression =
            ('ct' if
            stats.linregress( pd.Series(range(1, len(x)+1)), x ).pvalue<alpha
            else 'c')
            )[1]
    while pvalue>alpha:
        x = x.diff()
        pvalue = adfuller(x.dropna(),
            regression = 'c')[1]
        i += 1
        if pvalue<=alpha:
            break
    return(int(i))
    
### End of code 
Example #20
Source File: diagnostic.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False):
    return adfuller(x, maxlag=maxlag, regression=trendorder, autolag=autolag,
                    store=store, regresults=False)


#TODO: I like the bunch pattern for this too. 
Example #21
Source File: fraction.py    From finance_ml with MIT License 4 votes vote down vote up
def get_opt_d(series, ds=None, lag=1, thres=1e-5, max_size=10000,
              p_thres=1e-2, autolag=None, verbose=1, **kwargs):
    """Find minimum value of degree of stationary differntial
    
    Params
    ------
    series: pd.Series
    ds: array-like, default np.linspace(0, 1, 100)
        Search space of degree.
    lag: int, default 1
        The lag scale when making differential like series.diff(lag)
    thres: float, default 1e-5
        Threshold to determine fixed length window
    p_threds: float, default 1e-2
    auto_lag: str, optional
    verbose: int, default 1
        If 1 or 2, show the progress bar. 2 for notebook
    kwargs: paramters for ADF
    
    Returns
    -------
    int, optimal degree
    """
    if ds is None:
        ds = np.linspace(0, 1, 100)
    # Sort to ascending order
    ds = np.array(ds)
    sort_idx = np.argsort(ds)
    ds = ds[sort_idx]
    if verbose == 2:
        iter_ds = tqdm_notebook(ds)
    elif verbose == 1:
        iter_ds = tqdm(ds)
    else:
        iter_ds = ds
    opt_d = ds[-1]
    # Compute pval for each d
    for d in iter_ds:
        diff = frac_diff_FFD(series, d=d, thres=thres, max_size=max_size)
        pval = adfuller(diff.dropna().values, autolag=autolag, **kwargs)[1]
        if pval < p_thres:
            opt_d = d
            break
    return opt_d 
Example #22
Source File: pair_selector.py    From pairstrade-fyp-2019 with MIT License 4 votes vote down vote up
def coint(df, intercept = True, sig_level = 0.01):
    """
    Find pairs (of 2 time series) that passes the cointegration test.

    Parameters
    ----------
    df: pandas dataframe
        each column is the time series of a certain stock
    
    intercept: boolean
        if True, OLS and ADF test are done manually
        if False, the coint() function from statsmodels.tsa.stattools, which 
        does not include intercept term while doing OLS regression, is used.
    
    sig_level: if p_value of cointegration test is below this level, then we
        can reject the NULL hypothesis, which says that the two series are not
        cointegrated
    
    Return
    ------
    A list of tuples of the form (name of stock 1, name of stock 2, p_value of
    cointegration test).
    
    """
    cointegrated_pairs = []
    
    stock_names = df.columns.values.tolist()
    N = len(stock_names)
    
    stock_pairs = list(itertools.combinations(stock_names, 2))
    
    for pair in stock_pairs:
        stock_1, stock_2 = pair

        p_value = 0

        if not intercept:
            p_value = smts.coint(df[stock_1].values.astype(float), df[stock_2].values.astype(float), trend='c')[1]
        else:
            Y = df[stock_1].values.astype(float)
            X = df[stock_2].values.astype(float)
            X = sm.add_constant(X)

            model = sm.OLS(Y, X)
            results = model.fit()
            intercept, slope = results.params

            p_value = smts.adfuller(results.resid)[1]

        if p_value < sig_level and slope > 0:
            cointegrated_pairs.append(tuple([stock_1, stock_2, p_value]))

    return cointegrated_pairs