Python Examples of scipy.special.boxcox

Source File: data.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _yeo_johnson_optimize(self, x):
        """Find and return optimal lambda parameter of the Yeo-Johnson
        transform by MLE, for observed data x.

        Like for Box-Cox, MLE is done via the brent optimizer.
        """

        def _neg_log_likelihood(lmbda):
            """Return the negative log likelihood of the observed data x as a
            function of lambda."""
            x_trans = self._yeo_johnson_transform(x, lmbda)
            n_samples = x.shape[0]

            loglike = -n_samples / 2 * np.log(x_trans.var())
            loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()

            return -loglike

        # the computation of lambda is influenced by NaNs so we need to
        # get rid of them
        x = x[~np.isnan(x)]
        # choosing bracket -2, 2 like for boxcox
        return optimize.brent(_neg_log_likelihood, brack=(-2, 2))

Source File: test_boxcox.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_boxcox_basic():
    x = np.array([0.5, 1, 2, 4])

    # lambda = 0  =>  y = log(x)
    y = boxcox(x, 0)
    assert_almost_equal(y, np.log(x))

    # lambda = 1  =>  y = x - 1
    y = boxcox(x, 1)
    assert_almost_equal(y, x - 1)

    # lambda = 2  =>  y = 0.5*(x**2 - 1)
    y = boxcox(x, 2)
    assert_almost_equal(y, 0.5*(x**2 - 1))

    # x = 0 and lambda > 0  =>  y = -1 / lambda
    lam = np.array([0.5, 1, 2])
    y = boxcox(0, lam)
    assert_almost_equal(y, -1.0 / lam)

Source File: test_mpmath.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_boxcox(self):

        def mp_boxcox(x, lmbda):
            x = mpmath.mp.mpf(x)
            lmbda = mpmath.mp.mpf(lmbda)
            if lmbda == 0:
                return mpmath.mp.log(x)
            else:
                return mpmath.mp.powm1(x, lmbda) / lmbda

        assert_mpmath_equal(sc.boxcox,
                            exception_to_nan(mp_boxcox),
                            [Arg(a=0, inclusive_a=False), Arg()],
                            n=200,
                            dps=60,
                            rtol=1e-13)

Source File: test_boxcox.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_boxcox_underflow():
    x = 1 + 1e-15
    lmbda = 1e-306
    y = boxcox(x, lmbda)
    assert_allclose(y, np.log(x), rtol=1e-14)

Source File: test_boxcox.py From XenonPy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_transform_4x1_2(data):
    from scipy.special import boxcox as bc_
    shift = 1e-5
    bc = BoxCox(shift=shift)
    _data = data[0] - 2.
    trans = bc.fit_transform(_data)
    tmp = bc_(_data + (shift - _data.min()), bc.lambda_[0])
    assert np.all(trans == tmp)
    inverse = bc.inverse_transform(trans)
    assert np.allclose(inverse, _data)

Source File: transform.py From XenonPy with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, x):
        """

        Parameters
        ----------
        x

        Returns
        -------
        DataFrame
            Box-Cox transformed data.
        """
        x = self._check_type(x)
        xs = []
        for i, col in enumerate(x.T):
            if np.all(col > 0):
                self._shift[i] = 0.
            else:
                self._shift[i] -= col[~np.isnan(col)].min()

            _lmd = self._lmd[i]
            _shift = self._shift[i]
            for case in Switch(_lmd):
                if case(np.inf):
                    x = col
                    break
                if case(np.nan):
                    x = np.full(col.shape, np.nan)
                    break
                if case():
                    x = boxcox(col + _shift, _lmd)
            xs.append(x.reshape(-1, 1))
        xs = np.concatenate(xs, axis=1)

        if len(self._shape) == 1:
            return xs.ravel()
        return xs.reshape(-1, self._shape[1])

Source File: transform.py From XenonPy with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, *, lmd=None, shift=1e-9, tolerance=(-2, 2), on_err=None):
        """
        Parameters
        ----------
        lmd: list or 1-dim ndarray
            You might assign each input xs with a specific lmd yourself.
            Leave None(default) to use a inferred value.
            See `boxcox`_ for detials.
        shift: float
            Guarantee Xs are positive.
            BoxCox transform need all data positive.
            Therefore, a shift xs with their min and a specific shift data series(xs)``x = x - x.min + shift``.

        tolerance: tuple
            Tolerance of lmd. Set None to accept any.
            Default is **(-2, 2)**
        on_err: None or str
            Error handle when try to inference lambda. Can be None or **log**, **nan** or **raise** by string.
            **log** will return the logarithmic transform of xs that have a min shift to 1.
            **nan** return ``ndarray`` with shape xs.shape filled with``np.nan``.
            **raise** raise a FloatingPointError. You can catch it yourself.
            Default(None) will return the input series without scale transform.


        .. _boxcox:
            https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.boxcox.html
        """
        self._tolerance = tolerance
        self._shift = [shift]
        self._lmd = lmd
        self._shape = None
        self._on_err = on_err

Source File: boxcox.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, y, **transform_params):
        self.check_is_fitted()
        check_y(y)
        yt = boxcox(y.values, self.lambda_)
        return pd.Series(yt, index=y.index)

Source File: _continuous_distns.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)

Source File: _continuous_distns.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _isf(self, q, c):
        return -sc.boxcox(q, -c)

Source File: test_boxcox.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_boxcox_nonfinite():
    # x < 0  =>  y = nan
    x = np.array([-1, -1, -0.5])
    y = boxcox(x, [0.5, 2.0, -1.5])
    assert_equal(y, np.array([np.nan, np.nan, np.nan]))

    # x = 0 and lambda <= 0  =>  y = -inf
    x = 0
    y = boxcox(x, [-2.5, 0])
    assert_equal(y, np.array([-np.inf, -np.inf]))

Source File: _continuous_distns.py From lambda-packs with MIT License

5 votes

def _isf(self, q, c):
        return -sc.boxcox(q, -c)

Source File: _continuous_distns.py From GraphicDesignPatternByPython with MIT License

5 votes

def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)

Source File: _continuous_distns.py From GraphicDesignPatternByPython with MIT License

5 votes

def _isf(self, q, c):
        return -sc.boxcox(q, -c)

Source File: data.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def _box_cox_optimize(self, x):
        """Find and return optimal lambda parameter of the Box-Cox transform by
        MLE, for observed data x.

        We here use scipy builtins which uses the brent optimizer.
        """
        # the computation of lambda is influenced by NaNs so we need to
        # get rid of them
        _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)

        return lmbda

Source File: data.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def transform(self, X):
        """Apply the power transform to each feature using the fitted lambdas.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data to be transformed using a power transformation.

        Returns
        -------
        X_trans : array-like, shape (n_samples, n_features)
            The transformed data.
        """
        check_is_fitted(self, 'lambdas_')
        X = self._check_input(X, check_positive=True, check_shape=True)

        transform_function = {'box-cox': boxcox,
                              'yeo-johnson': self._yeo_johnson_transform
                              }[self.method]
        for i, lmbda in enumerate(self.lambdas_):
            with np.errstate(invalid='ignore'):  # hide NaN warnings
                X[:, i] = transform_function(X[:, i], lmbda)

        if self.standardize:
            X = self._scaler.transform(X)

        return X

Source File: data.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def _fit(self, X, y=None, force_transform=False):
        X = self._check_input(X, check_positive=True, check_method=True)

        if not self.copy and not force_transform:  # if call from fit()
            X = X.copy()  # force copy so that fit does not change X inplace

        optim_function = {'box-cox': self._box_cox_optimize,
                          'yeo-johnson': self._yeo_johnson_optimize
                          }[self.method]
        with np.errstate(invalid='ignore'):  # hide NaN warnings
            self.lambdas_ = np.array([optim_function(col) for col in X.T])

        if self.standardize or force_transform:
            transform_function = {'box-cox': boxcox,
                                  'yeo-johnson': self._yeo_johnson_transform
                                  }[self.method]
            for i, lmbda in enumerate(self.lambdas_):
                with np.errstate(invalid='ignore'):  # hide NaN warnings
                    X[:, i] = transform_function(X[:, i], lmbda)

        if self.standardize:
            self._scaler = StandardScaler(copy=False)
            if force_transform:
                X = self._scaler.fit_transform(X)
            else:
                self._scaler.fit(X)

        return X

Source File: _continuous_distns.py From lambda-packs with MIT License

5 votes

def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)

Source File: boxcox.py From sktime with BSD 3-Clause "New" or "Revised" License

4 votes

def boxcox_normmax(x, bounds=None, brack=(-2.0, 2.0), method='pearsonr'):
    # bounds is None, use simple Brent optimisation
    if bounds is None:
        def optimizer(func, args):
            return optimize.brent(func, brack=brack, args=args)

    # otherwise use bounded Brent optimisation
    else:
        # input checks on bounds
        if not isinstance(bounds, tuple) or len(bounds) != 2:
            raise ValueError(
                f"`bounds` must be a tuple of length 2, but found: {bounds}")

        def optimizer(func, args):
            return optimize.fminbound(func, bounds[0], bounds[1], args=args)

    def _pearsonr(x):
        osm_uniform = _calc_uniform_order_statistic_medians(len(x))
        xvals = distributions.norm.ppf(osm_uniform)

        def _eval_pearsonr(lmbda, xvals, samps):
            # This function computes the x-axis values of the probability plot
            # and computes a linear regression (including the correlation) and
            # returns ``1 - r`` so that a minimization function maximizes the
            # correlation.
            y = boxcox(samps, lmbda)
            yvals = np.sort(y)
            r, prob = stats.pearsonr(xvals, yvals)
            return 1 - r

        return optimizer(_eval_pearsonr, args=(xvals, x))

    def _mle(x):
        def _eval_mle(lmb, data):
            # function to minimize
            return -boxcox_llf(lmb, data)

        return optimizer(_eval_mle, args=(x,))

    def _all(x):
        maxlog = np.zeros(2, dtype=float)
        maxlog[0] = _pearsonr(x)
        maxlog[1] = _mle(x)
        return maxlog

    methods = {'pearsonr': _pearsonr,
               'mle': _mle,
               'all': _all}
    if method not in methods.keys():
        raise ValueError("Method %s not recognized." % method)

    optimfunc = methods[method]
    return optimfunc(x)

Python scipy.special.boxcox() Examples