Python Examples of statsmodels.tsa.arima

Source File: stock_modeler.py From stock-analysis with MIT License

6 votes

def arima(df, *, ar, i, ma, fit=True):
        """
        Create an ARIMA object for modeling time series.

        Parameters:
            - df: The dataframe containing the stock closing price as `close`
                  and with a time index.
            - ar: The autoregressive order (p).
            - i: The differenced order (q).
            - ma: The moving average order (d).
            - fit: Whether or not to return the fitted model,
                   defaults to True.

        Returns:
            A statsmodels ARIMA object which you can use to fit and predict.
        """
        arima_model = ARIMA(
            df.close.asfreq('B').fillna(method='ffill'), order=(ar, i, ma)
        )
        return arima_model.fit() if fit else arima_model

Source File: testScoreWithAdapaStatsmodels.py From nyoka with Apache License 2.0

6 votes

def test_01(self):
        ts_data = self.getData()
        f_name='arima201_c_car_sold.pmml'
        model = ARIMA(ts_data,order=(2,0,1))
        result = model.fit(trend = 'c', method = 'css')
        StatsmodelsToPmml(result, f_name, conf_int=[95])

        model_name = self.adapa_utility.upload_to_zserver(f_name)
        z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS')

        z_forecasts = np.array(list(z_pred['outputs'][0]['predicted_'+ts_data.squeeze().name].values()))
        model_forecasts = result.forecast(5)[0]

        z_conf_int_95_upper = np.array(list(z_pred['outputs'][0]['conf_int_95_upper_'+ts_data.squeeze().name].values()))
        model_conf_int_95_upper = result.forecast(5)[-1][:,-1]

        z_conf_int_95_lower = np.array(list(z_pred['outputs'][0]['conf_int_95_lower_'+ts_data.squeeze().name].values()))
        model_conf_int_95_lower = result.forecast(5)[-1][:,0]

        self.assertEqual(np.allclose(z_forecasts, model_forecasts),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper, model_conf_int_95_upper),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower, model_conf_int_95_lower),True)

Source File: timeseries_arima.py From ad_examples with MIT License

6 votes

def rolling_forecast_ARIMA(train, test, order, nsteps=1):
    tseries = [x for x in train]
    rets = []
    errors = []
    tindex = pd.to_datetime(np.arange(1, len(train) + nsteps + 1))
    for i in range(nsteps):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            # hack the time index, else ARIMA will not run
            model_fit, residuals = fit_ARIMA(tseries, dates=tindex[0:len(tseries)], order=order)
            if len(order) == 3:
                # ARIMA forecast
                forecasts = model_fit.forecast()
                val = forecasts[0]
            else:
                # SARIMA forecast
                val = model_fit.forecast()
            val = val[0]
            rets.append(val)
            errors.append(test[i] - val)
            tseries.append(test[i])
    return np.array(rets, dtype=float), np.array(errors, dtype=float)

Source File: econometrics.py From gs-quant with Apache License 2.0

6 votes

def _evaluate_arima_model(X: Union[pd.Series, pd.DataFrame], arima_order: Tuple[int, int, int],
                              train_size: Union[float, int, None], freq: str) -> Tuple[float, dict]:
        train_size = int(len(X) * 0.75) if train_size is None else int(len(X) * train_size) \
            if isinstance(train_size, float) else train_size
        train, test = X[:train_size].astype(float), X[train_size:].astype(float)

        model = ARIMA(train, order=arima_order, freq=freq)
        model_fit = model.fit(disp=False, method='css', trend='nc')

        # calculate test error
        yhat = model_fit.forecast(len(test))[0]
        error = mse(test, yhat)

        return error, model_fit

Source File: statsmodels.py From MLPrimitives with MIT License

6 votes

def __init__(self, p, d, q, steps):
        """Initialize the ARIMA object.

        Args:
            p (int):
                Integer denoting the order of the autoregressive model.
            d (int):
                Integer denoting the degree of differencing.
            q (int):
                Integer denoting the order of the moving-average model.
            steps (int):
                Integer denoting the number of time steps to predict ahead.
        """
        self.p = p
        self.d = d
        self.q = q
        self.steps = steps

Source File: stock_modeler.py From stock-analysis with MIT License

5 votes

def arima_predictions(df, arima_model_fitted, start, end, plot=True, **kwargs):
        """
        Get ARIMA predictions as pandas Series or plot.

        Parameters:
            - df: The dataframe for the stock.
            - arima_model_fitted: The fitted ARIMA model.
            - start: The start date for the predictions.
            - end: The end date for the predictions.
            - plot: Whether or not to plot the result, default is
                    True meaning the plot is returned instead of the
                    pandas Series containing the predictions.
            - kwargs: Additional keyword arguments to pass to the pandas
                      `plot()` method.

        Returns:
            A matplotlib Axes object or predictions as a Series
            depending on the value of the `plot` argument.
        """
        predicted_changes = arima_model_fitted.predict(
            start=start,
            end=end
        )

        predictions = pd.Series(
            predicted_changes, name='close'
        ).cumsum() + df.last('1D').close.iat[0]

        if plot:
            ax = df.close.plot(**kwargs)
            predictions.plot(ax=ax, style='r:', label='arima predictions')
            ax.legend()

        return ax if plot else predictions

Source File: test_sarimax.py From vnpy_crypto with MIT License

5 votes

def setup_class(cls):
        cls.true = results_sarimax.wpi1_stationary
        endog = cls.true['data']

        cls.model_a = arima.ARIMA(endog, order=(1, 1, 1))
        cls.result_a = cls.model_a.fit(disp=-1)

        cls.model_b = sarimax.SARIMAX(endog, order=(1, 1, 1), trend='c',
                                       simple_differencing=True,
                                       hamilton_representation=True)
        cls.result_b = cls.model_b.fit(disp=-1)

Source File: test_sarimax.py From vnpy_crypto with MIT License

5 votes

def test_mle(self):
        # ARIMA estimates the mean of the process, whereas SARIMAX estimates
        # the intercept. Convert the mean to intercept to compare
        params_a = self.result_a.params.copy()
        params_a[0] = (1 - params_a[1]) * params_a[0]
        assert_allclose(self.result_b.params[:-1], params_a, atol=5e-5)

Source File: timeseries_arima.py From ad_examples with MIT License

5 votes

def fit_ARIMA(series, dates=None, order=(0, 0, 1)):
    """Fits either an ARIMA or a SARIMA model depending on whether order is 3 or 4 dimensional

    :param series:
    :param dates:
    :param order: tuple
        If this has 3 elements, an ARIMA model will be fit
        If this has 4 elements, the fourth is the seasonal factor and SARIMA will be fit
    :return: fitted model, array of residuals
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # hack the time index, else ARIMA will not run
        if dates is None:
            dates = pd.to_datetime(np.arange(1, len(series)+1))
        if len(order) > 3:
            seasonal_order = (0, 0, 0, order[3])
            arima_order = (order[0], order[1], order[2])
            model = SARIMAX(series, dates=dates, order=arima_order, seasonal_order=seasonal_order)
            model_fit = model.fit(disp=0)
            residuals = model_fit.resid
        else:
            model = ARIMA(series, dates=dates, order=order)
            model_fit = model.fit(disp=0)
            residuals = model_fit.resid
    return model_fit, residuals

Source File: arima.py From pyFTS with GNU General Public License v3.0

5 votes

def train(self, data, **kwargs):

        if 'order' in kwargs:
            order = kwargs.pop('order')
            self._decompose_order(order)

        if self.indexer is not None:
            data = self.indexer.get_data(data)

        try:
            self.model =  stats_arima(data, order=(self.p, self.d, self.q))
            self.model_fit = self.model.fit(disp=0)
        except Exception as ex:
            print(ex)
            self.model_fit = None

Source File: econometrics.py From gs-quant with Apache License 2.0

5 votes

def transform(self, X: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame:
        """
        Transform a series based on the best ARIMA found from fit().
        Does not support tranformation using MA components.
        :param X: time series to be operated on; required parameter
        :return: DataFrame
        """
        X = X.to_frame() if isinstance(X, pd.Series) else X
        return pd.DataFrame({s_id: self._arima_transform_series(self.best_params[s_id]) for s_id in X.columns})

Source File: statsmodels.py From MLPrimitives with MIT License

5 votes

def predict(self, X):
        """Predict values using the initialized object.

        Args:
            X (ndarray):
                N-dimensional array containing the input sequences for the model.

        Returns:
            ndarray:
                N-dimensional array containing the predictions for each input sequence.
        """
        arima_results = list()
        dimensions = len(X.shape)

        if dimensions > 2:
            raise ValueError("Only 1D o 2D arrays are supported")

        if dimensions == 1 or X.shape[1] == 1:
            X = np.expand_dims(X, axis=0)

        num_sequences = len(X)
        for sequence in range(num_sequences):
            arima = arima_model.ARIMA(X[sequence], order=(self.p, self.d, self.q))
            arima_fit = arima.fit(disp=0)
            arima_results.append(arima_fit.forecast(self.steps)[0])

        arima_results = np.asarray(arima_results)

        if dimensions == 1:
            arima_results = arima_results[0]

        return arima_results

Source File: test_sarimax.py From vnpy_crypto with MIT License

4 votes

def test_arima000():
    from statsmodels.tsa.statespace.tools import compatibility_mode

    # Test an ARIMA(0,0,0) with measurement error model (i.e. just estimating
    # a variance term)
    np.random.seed(328423)
    nobs = 50
    endog = pd.DataFrame(np.random.normal(size=nobs))
    mod = sarimax.SARIMAX(endog, order=(0, 0, 0), measurement_error=False)
    res = mod.smooth(mod.start_params)
    assert_allclose(res.smoothed_state, endog.T)

    # ARIMA(0, 1, 0)
    mod = sarimax.SARIMAX(endog, order=(0, 1, 0), measurement_error=False)
    res = mod.smooth(mod.start_params)
    assert_allclose(res.smoothed_state[1:, 1:], endog.diff()[1:].T)

    # SARIMA(0, 1, 0)x(0, 1, 0, 1)
    mod = sarimax.SARIMAX(endog, order=(0, 1, 0), measurement_error=True,
                          seasonal_order=(0, 1, 0, 1))
    res = mod.smooth(mod.start_params)

    # Exogenous variables
    error = np.random.normal(size=nobs)
    endog = np.ones(nobs) * 10 + error
    exog = np.ones(nobs)

    # We need univariate filtering here, to guarantee we won't hit singular
    # forecast error covariance matrices.
    if compatibility_mode:
        return

    # OLS
    mod = sarimax.SARIMAX(endog, order=(0, 0, 0), exog=exog)
    mod.ssm.filter_univariate = True
    res = mod.smooth([10., 1.])
    assert_allclose(res.smoothed_state[0], error, atol=1e-10)

    # RLS
    mod = sarimax.SARIMAX(endog, order=(0, 0, 0), exog=exog,
                          mle_regression=False)
    mod.ssm.filter_univariate = True
    mod.initialize_known([0., 10.], np.diag([1., 0.]))
    res = mod.smooth([1.])
    assert_allclose(res.smoothed_state[0], error, atol=1e-10)
    assert_allclose(res.smoothed_state[1], 10, atol=1e-10)

    # RLS + TVP
    mod = sarimax.SARIMAX(endog, order=(0, 0, 0), exog=exog,
                          mle_regression=False, time_varying_regression=True)
    mod.ssm.filter_univariate = True
    mod.initialize_known([10.], np.diag([0.]))
    res = mod.smooth([0., 1.])
    assert_allclose(res.smoothed_state[0], 10, atol=1e-10)

Source File: econometrics.py From gs-quant with Apache License 2.0

4 votes

def fit(self, X: Union[pd.Series, pd.DataFrame], train_size: Union[float, int, None] = None,
            p_vals: list = (0, 1, 2), d_vals: list = (0, 1, 2), q_vals: list = (0, 1, 2), freq: str = None) -> 'arima':
        """
        Train a combination of ARIMA models. If pandas DataFrame, finds the
        best arima model parameters for each column. If pandas Series, finds
        the best arima model parameters for the series.
        :param X: time series to be operated on; required parameter
        :param train_size: if float, should be between 0.0 and 1.0 and
        represent the proportion of the dataset to include in the train split.
        If int, represents the absolute number of train samples. If None,
        the value is automatically set 0.75
        :p_vals: number of autoregressive terms to search; default is [0,1,2]
        :d_vals: number of differences to search; default is [0,1,2]
        :q_vals: number of lagged forecast to search; always [0,1,2]
        :freq: frequency of time series, default is None
        :return: self
        """
        if isinstance(X, pd.Series):
            X = X.to_frame()

        for series_id in X.columns:
            series = X[series_id]
            best_score = float('inf')
            best_order = None
            best_const = None
            best_ar_coef = None
            best_ma_coef = None
            best_resid = None
            for order in list(itertools.product(*[p_vals, d_vals, q_vals])):
                try:
                    error, model_fit = self._evaluate_arima_model(series, order, train_size, freq)
                    if error < best_score:
                        best_score = error
                        best_order = order
                        best_const = model_fit.params.to_dict().get('const', 0)
                        best_ar_coef = model_fit.arparams
                        best_ma_coef = model_fit.maparams
                        best_resid = model_fit.resid
                except Exception as e:
                    print('   {}'.format(e))
                    continue

            p, d, q = best_order
            self.best_params[series_id] = ARIMABestParams(freq, p, d, q, best_const, best_ar_coef, best_ma_coef,
                                                          best_resid, series)
        return self

Source File: ARIMA.py From Splunking-Crime with GNU Affero General Public License v3.0

4 votes

def _fit(self, X):
        for variable in self.feature_variables:
            df_util.assert_field_present(X, variable)
        df_util.drop_unused_fields(X, self.feature_variables)
        df_util.assert_any_fields(X)
        df_util.assert_any_rows(X)

        if X[self.time_series].dtype == object:
            raise ValueError('%s contains non-numeric data. ARIMA only accepts numeric data.' % self.time_series)
        X[self.time_series] = X[self.time_series].astype(float)

        try:
            self.estimator = _ARIMA(X[self.time_series].values,
                                    order=self.out_params['model_params']['order'],
                                    missing=self.out_params['model_params']['missing']).fit(disp=False)
        except ValueError as e:
            if 'stationary' in e.message:
                raise ValueError("The computed initial AR coefficients are not "
                                 "stationary. You should induce stationarity by choosing a different model order.")
            elif 'invertible' in e.message:
                raise ValueError("The computed initial MA coefficients are not invertible. "
                                 "You should induce invertibility by choosing a different model order.")
            else:
                cexc.log_traceback()
                raise ValueError(e)
        except MissingDataError:
            raise RuntimeError('Empty or null values are not supported in %s. '
                               'If using timechart, try using a larger span.'
                               % self.time_series)
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError(e)

        # Saving the _time but not as a part of the ARIMA structure but as new attribute for ARIMA.
        if '_time' in self.feature_variables:
            freq = self._find_freq(X['_time'].values, self.freq_threshold)
            self.estimator.datetime_information = dict(ver=0,
                                                       _time=X['_time'].values,
                                                       freq=freq,
                                                       # in seconds (unix epoch)
                                                       first_timestamp=X['_time'].values[0],
                                                       last_timestamp=X['_time'].values[-1],
                                                       length=len(X))
        else:
            self.estimator.datetime_information = dict(ver=0,
                                                       _time=None,
                                                       freq=None,
                                                       first_time=None,
                                                       last_time=None,
                                                       length=len(X))

Python statsmodels.tsa.arima_model.ARIMA Examples