Python Examples of statsmodels.tsa.stattools.adfuller

Source File: Stationarity.py From METU-BA4318-Spring2019 with GNU General Public License v3.0

7 votes

def test_stationarity(timeseries):
    #Determing rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    #Perform Dickey-Fuller test:
    print("Results of Dickey-Fuller Test:")
    array = np.asarray(timeseries, dtype='float')
    np.nan_to_num(array,copy=False)
    dftest = adfuller(array, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)


# Load data

Source File: transformation_function.py From arauto with Apache License 2.0

7 votes

def test_custom_difference(self, custom_transformation_size):
        self.d = custom_transformation_size[0]
        self.D = custom_transformation_size[1]

        self.transformed_time_series = self.original_timeseries.diff(self.d).diff(self.seasonality * self.D).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = lambda x: x

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff({}).diff({}).dropna(), autolag='AIC')
                '''.format(self.d, self.D)

        self.label = 'Custom Difference' if self.dftest[0] < self.dftest[4]['1%'] else None

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: Stationarity-Patients.py From METU-BA4318-Spring2019 with GNU General Public License v3.0

6 votes

def test_stationarity(timeseries):
    #Determing rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    #Perform Dickey-Fuller test:
    print("Results of Dickey-Fuller Test:")
    array = np.asarray(timeseries, dtype='float')
    np.nan_to_num(array,copy=False)
    dftest = adfuller(array, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)


# Load data

Source File: transformation_function.py From arauto with Apache License 2.0

5 votes

def test_seasonal_log_difference(self):
        '''
        Run the Adfuller test on the original data with seasonal difference and log first difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = np.log1p(self.original_timeseries).diff().diff(self.seasonality).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = np.log1p

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                df = np.log1p(df)
                dftest = adfuller(df.diff().diff({}).dropna(), autolag='AIC')
                '''.format(self.seasonality)

        self.label = 'Log Difference + Seasonal Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 1
        self.D = 1

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: DyStockDataML.py From DevilYuan with MIT License

5 votes

def adfTest(s):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1]

Source File: DySS_LimitUpAnalysis.py From DevilYuan with MIT License

5 votes

def _adfTest(self, df):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        df = df[-21:-1]

        result = stattools.adfuller(df['close'], 1)

        return result[1]

Source File: DySS_Pairs.py From DevilYuan with MIT License

5 votes

def _adfTest(self, s):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1]

Source File: process_raw_prices.py From pairstrade-fyp-2019 with MIT License

5 votes

def get_filename_without_ext(path):
    filename = os.path.basename(path)
    return os.path.splitext(filename)[0]


# def compute_intercept_and_pvalue(p1, p2):
#     # log Y = intercept + log X + c
#     Y, X = pd.Series(np.log(p1)), pd.Series(np.log(p2))

#     # returns (intercept, pvalue)
#     return (np.mean(Y - X), smts.adfuller(Y - X)[1])

Source File: diagnostic.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False):
    return adfuller(x, maxlag=maxlag, regression=trendorder, autolag=autolag,
                    store=store, regresults=False)


#TODO: I like the bunch pattern for this too.

Source File: seasonality.py From Learn-Algorithmic-Trading---Fundamentals-of-Algorithmic-Trading with MIT License

5 votes

def test_stationarity(timeseries):
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries[1:], autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    print (dfoutput)

Source File: test_adfuller_lag.py From vnpy_crypto with MIT License

5 votes

def test_adf_autolag():
    #see issue #246
    #this is mostly a unit test
    d2 = macrodata.load().data

    for k_trend, tr in enumerate(['nc', 'c', 'ct', 'ctt']):
        #[None:'nc', 0:'c', 1:'ct', 2:'ctt']
        x = np.log(d2['realgdp'])
        xd = np.diff(x)

        #check exog
        adf3 = tsast.adfuller(x, maxlag=None, autolag='aic',
                              regression=tr, store=True, regresults=True)
        st2 = adf3[-1]

        assert_equal(len(st2.autolag_results), 15 + 1)  #+1 for lagged level
        for l, res in sorted(iteritems(st2.autolag_results))[:5]:
            lag = l-k_trend
            #assert correct design matrices in _autolag
            assert_equal(res.model.exog[-10:,k_trend], x[-11:-1])
            assert_equal(res.model.exog[-1,k_trend+1:], xd[-lag:-1][::-1])
            #min-ic lag of dfgls in Stata is also 2, or 9 for maic with notrend
            assert_equal(st2.usedlag, 2)

        #same result with lag fixed at usedlag of autolag
        adf2 = tsast.adfuller(x, maxlag=2, autolag=None, regression=tr)
        assert_almost_equal(adf3[:2], adf2[:2], decimal=12)


    tr = 'c'
    #check maxlag with autolag
    adf3 = tsast.adfuller(x, maxlag=5, autolag='aic',
                          regression=tr, store=True, regresults=True)
    assert_equal(len(adf3[-1].autolag_results), 5 + 1)
    adf3 = tsast.adfuller(x, maxlag=0, autolag='aic',
                          regression=tr, store=True, regresults=True)
    assert_equal(len(adf3[-1].autolag_results), 0 + 1)

Source File: transformation_function.py From arauto with Apache License 2.0

5 votes

def test_seasonal_difference(self):
        '''
        Run the Adfuller test on the original data with seasonal difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = self.original_timeseries.diff(self.seasonality).dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = lambda x: x

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff({}).dropna(), autolag='AIC')
                    '''.format(self.seasonality)
        self.label = 'Seasonality Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 0
        self.D = 1

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: transformation_function.py From arauto with Apache License 2.0

5 votes

def test_log_transformation(self):
        '''
        Run the Adfuller test on the original data with log transformation

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = np.log1p(self.original_timeseries)
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')
        self.transformation_function = np.log1p

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                df = np.log1p(df) 
                dftest = adfuller(np.log1p(df), autolag='AIC')
                    '''
        self.label = 'Log transformation' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 0
        self.D = 0

        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: transformation_function.py From arauto with Apache License 2.0

5 votes

def test_first_difference(self):
        '''
        Run the Adfuller test on the original data with first difference

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied.
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation.
            D (int): the amount of seasonal integrated terms used in this function/transformation.
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.transformed_time_series = self.original_timeseries.diff().dropna()
        self.dftest = adfuller(self.transformed_time_series, autolag='AIC')

        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df.diff().dropna(), autolag='AIC')
                    '''
        self.label = 'Difference' if self.dftest[0] < self.dftest[4]['1%'] else None
        self.d = 1
        self.D = 0
        
        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: transformation_function.py From arauto with Apache License 2.0

5 votes

def test_absolute_data(self):
        '''
        Run the Adfuller test on the original data, without transformation

        Returns:
            dftest (tuple): a tuple containing the Augmented Dickey-Fuller test. Among other things, 
                it contains the test statistics, the critical values, and p-values. Please refer to 
                (https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html) for
                further information
            transformed_time_series (pandas Series): the transformed time series if applied. For this function,
                it returns the original time series, since no transformations are applied. 
            label (str): if the adfuller result is statistical significant, a string is returned informing the
                transformation that was applied to the time series. This informations is only needed to inform on
                Matplotlib plots in test_stationarity function 
            d (int): the amount of integrated terms used in this function/transformation. For this function, no differencing is
                applied, since it returns the original time series 
            D (int): the amount of seasonal integrated terms used in this function/transformation. For this function, no differencing is
                applied, since it returns the original time series
            transformation_function (func): this module contains two distinct transformation functions: numpy.log and lambda x: x.
                This value informs what transformation function was used on the time series. If Logarithm was used, returns numpy.log,
                otherwise, returns a lambda function 
            test_stationarity_code (str): the code that was used on this transformation. This is used in future to generate the code
                for the user on Arauto .
            seasonality (int): the amount of seasonality terms
        '''

        self.dftest = adfuller(self.original_timeseries, autolag='AIC')
        
        self.test_stationarity_code = '''
                # Applying Augmented Dickey-Fuller test
                dftest = adfuller(df, autolag='AIC')
                    '''
        self.label = 'Absolute' if self.dftest[0] < self.dftest[4]['1%'] else None
        
        return self.dftest, self.transformed_time_series, self.label, self.d, self.D, self.transformation_function, self.test_stationarity_code, self.seasonality

Source File: DyStockDataML.py From DevilYuan with MIT License

5 votes

def adfTest(s):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1]

Source File: DySS_LimitUpAnalysis.py From DevilYuan with MIT License

5 votes

def _adfTest(self, df):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        df = df[-21:-1]

        result = stattools.adfuller(df['close'], 1)

        return result[1]

Source File: DySS_Pairs.py From DevilYuan with MIT License

5 votes

def _adfTest(self, s):
        """
            ADF Test
            p值越大：随机漫步，可能是趋势
            p值越小：均值回归
        """
        result = stattools.adfuller(s, 1)

        return result[1]

Source File: diff_nonstationary.py From HN_SO_analysis with MIT License

5 votes

def diff_nonstationary(x, alpha):
    
    """Returns number of differentiations required to transform
    a non-stationary time series into a stationary one. If 0 (zero) is
    returned, there's no need to differentiate."""
    """
    PARAMETERS:
    1) x - input time series
    2) alpha - significance level
    """
    
    i = 0 # no need to differentiate
    pvalue = adfuller(x, regression =
            ('ct' if
            stats.linregress( pd.Series(range(1, len(x)+1)), x ).pvalue<alpha
            else 'c')
            )[1]
    while pvalue>alpha:
        x = x.diff()
        pvalue = adfuller(x.dropna(),
            regression = 'c')[1]
        i += 1
        if pvalue<=alpha:
            break
    return(int(i))
    
### End of code

Source File: diagnostic.py From vnpy_crypto with MIT License

5 votes

def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False):
    return adfuller(x, maxlag=maxlag, regression=trendorder, autolag=autolag,
                    store=store, regresults=False)


#TODO: I like the bunch pattern for this too.

Source File: fraction.py From finance_ml with MIT License

4 votes

def get_opt_d(series, ds=None, lag=1, thres=1e-5, max_size=10000,
              p_thres=1e-2, autolag=None, verbose=1, **kwargs):
    """Find minimum value of degree of stationary differntial
    
    Params
    ------
    series: pd.Series
    ds: array-like, default np.linspace(0, 1, 100)
        Search space of degree.
    lag: int, default 1
        The lag scale when making differential like series.diff(lag)
    thres: float, default 1e-5
        Threshold to determine fixed length window
    p_threds: float, default 1e-2
    auto_lag: str, optional
    verbose: int, default 1
        If 1 or 2, show the progress bar. 2 for notebook
    kwargs: paramters for ADF
    
    Returns
    -------
    int, optimal degree
    """
    if ds is None:
        ds = np.linspace(0, 1, 100)
    # Sort to ascending order
    ds = np.array(ds)
    sort_idx = np.argsort(ds)
    ds = ds[sort_idx]
    if verbose == 2:
        iter_ds = tqdm_notebook(ds)
    elif verbose == 1:
        iter_ds = tqdm(ds)
    else:
        iter_ds = ds
    opt_d = ds[-1]
    # Compute pval for each d
    for d in iter_ds:
        diff = frac_diff_FFD(series, d=d, thres=thres, max_size=max_size)
        pval = adfuller(diff.dropna().values, autolag=autolag, **kwargs)[1]
        if pval < p_thres:
            opt_d = d
            break
    return opt_d

Source File: pair_selector.py From pairstrade-fyp-2019 with MIT License

4 votes

def coint(df, intercept = True, sig_level = 0.01):
    """
    Find pairs (of 2 time series) that passes the cointegration test.

    Parameters
    ----------
    df: pandas dataframe
        each column is the time series of a certain stock
    
    intercept: boolean
        if True, OLS and ADF test are done manually
        if False, the coint() function from statsmodels.tsa.stattools, which 
        does not include intercept term while doing OLS regression, is used.
    
    sig_level: if p_value of cointegration test is below this level, then we
        can reject the NULL hypothesis, which says that the two series are not
        cointegrated
    
    Return
    ------
    A list of tuples of the form (name of stock 1, name of stock 2, p_value of
    cointegration test).
    
    """
    cointegrated_pairs = []
    
    stock_names = df.columns.values.tolist()
    N = len(stock_names)
    
    stock_pairs = list(itertools.combinations(stock_names, 2))
    
    for pair in stock_pairs:
        stock_1, stock_2 = pair

        p_value = 0

        if not intercept:
            p_value = smts.coint(df[stock_1].values.astype(float), df[stock_2].values.astype(float), trend='c')[1]
        else:
            Y = df[stock_1].values.astype(float)
            X = df[stock_2].values.astype(float)
            X = sm.add_constant(X)

            model = sm.OLS(Y, X)
            results = model.fit()
            intercept, slope = results.params

            p_value = smts.adfuller(results.resid)[1]

        if p_value < sig_level and slope > 0:
            cointegrated_pairs.append(tuple([stock_1, stock_2, p_value]))

    return cointegrated_pairs

Python statsmodels.tsa.stattools.adfuller() Examples