Python Examples of patsy.PatsyError

Source File: test_regression.py From vnpy_crypto with MIT License

6 votes

def test_formula_missing_cat():
    # gh-805

    import statsmodels.api as sm
    from statsmodels.formula.api import ols
    from patsy import PatsyError

    dta = sm.datasets.grunfeld.load_pandas().data
    dta.loc[dta.index[0], 'firm'] = np.nan

    mod = ols(formula='value ~ invest + capital + firm + year',
              data=dta.dropna())
    res = mod.fit()

    mod2 = ols(formula='value ~ invest + capital + firm + year',
               data=dta)
    res2 = mod2.fit()

    assert_almost_equal(res.params.values, res2.params.values)

    assert_raises(PatsyError, ols, 'value ~ invest + capital + firm + year',
                  data=dta, missing='raise')

Source File: contrasts.py From vnpy_crypto with MIT License

6 votes

def test_ContrastMatrix():
    cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
    assert np.array_equal(cm.matrix, np.eye(2))
    assert cm.column_suffixes == ["a", "b"]
    # smoke test
    repr(cm)

    from nose.tools import assert_raises
    assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])

    assert_no_pickling(cm)

# This always produces an object of the type that Python calls 'str' (whether
# that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does
# *not* make any particular guarantees about being reversible or having other
# such useful programmatic properties -- it just produces something that will
# be nice for users to look at.

Source File: data.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def __setstate__(self, d):
        if "restore_design_info" in d:
            # NOTE: there may be a more performant way to do this
            from patsy import dmatrices, PatsyError
            exc = []
            try:
                data = d['frame']
            except KeyError:
                data = d['orig_endog'].join(d['orig_exog'])

            for depth in [2, 3, 1, 0, 4]:  # sequence is a guess where to likely find it
                try:
                    _, design = dmatrices(d['formula'], data, eval_env=depth,
                                          return_type='dataframe')
                    break
                except (NameError, PatsyError) as e:
                    print('not in depth %d' % depth)
                    exc.append(e)   # why do I need a reference from outside except block
                    pass
            else:
                raise exc[-1]

            self.design_info = design.design_info
            del d["restore_design_info"]
        self.__dict__.update(d)

Source File: compat.py From vnpy_crypto with MIT License

6 votes

def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
    try:
        return f(*args, **kwargs)
    except Exception as e:
        if sys.version_info[0] >= 3:
            new_exc = PatsyError("%s: %s: %s"
                                 % (msg, e.__class__.__name__, e),
                                 origin)
            # Use 'exec' to hide this syntax from the Python 2 parser:
            exec("raise new_exc from e")
        else:
            # In python 2, we just let the original exception escape -- better
            # than destroying the traceback. But if it's a PatsyError, we can
            # at least set the origin properly.
            if isinstance(e, PatsyError):
                e.set_origin(origin)
            raise

Source File: eval.py From vnpy_crypto with MIT License

6 votes

def ast_names(code):
    """Iterator that yields all the (ast) names in a Python expression.

    :arg code: A string containing a Python expression.
    """
    # Syntax that allows new name bindings to be introduced is tricky to
    # handle here, so we just refuse to do so.
    disallowed_ast_nodes = (ast.Lambda, ast.ListComp, ast.GeneratorExp)
    if sys.version_info >= (2, 7):
        disallowed_ast_nodes += (ast.DictComp, ast.SetComp)

    for node in ast.walk(ast.parse(code)):
        if isinstance(node, disallowed_ast_nodes):
            raise PatsyError("Lambda, list/dict/set comprehension, generator "
                             "expression in patsy formula not currently supported.")
        if isinstance(node, ast.Name):
            yield node.id

Source File: desc.py From vnpy_crypto with MIT License

6 votes

def eval(self, tree, require_evalexpr=True):
        result = None
        assert isinstance(tree, ParseNode)
        key = (tree.type, len(tree.args))
        if key not in self._evaluators:
            raise PatsyError("I don't know how to evaluate this "
                                "'%s' operator" % (tree.type,),
                                tree.token)
        result = self._evaluators[key](self, tree)
        if require_evalexpr and not isinstance(result, IntermediateExpr):
            if isinstance(result, ModelDesc):
                raise PatsyError("~ can only be used once, and "
                                    "only at the top level",
                                    tree)
            else:
                raise PatsyError("custom operator returned an "
                                    "object that I don't know how to "
                                    "handle", tree)
        return result

#############

Source File: desc.py From vnpy_crypto with MIT License

6 votes

def _eval_binary_power(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    _check_interactable(left_expr)
    power = -1
    if tree.args[1].type in ("ONE", "NUMBER"):
        expr = tree.args[1].token.extra
        try:
            power = int(expr)
        except ValueError:
            pass
    if power < 1:
        raise PatsyError("'**' requires a positive integer", tree.args[1])
    all_terms = left_expr.terms
    big_expr = left_expr
    # Small optimization: (a + b)**100 is just the same as (a + b)**2.
    power = min(len(left_expr.terms), power)
    for i in range(1, power):
        big_expr = _interaction(left_expr, big_expr)
        all_terms = all_terms + big_expr.terms
    return IntermediateExpr(False, None, False, all_terms)

Source File: user_util.py From vnpy_crypto with MIT License

6 votes

def test_demo_data():
    d1 = demo_data("a", "b", "x")
    assert sorted(d1.keys()) == ["a", "b", "x"]
    assert d1["a"] == ["a1", "a1", "a2", "a2", "a1", "a1", "a2", "a2"]
    assert d1["b"] == ["b1", "b2", "b1", "b2", "b1", "b2", "b1", "b2"]
    assert d1["x"].dtype == np.dtype(float)
    assert d1["x"].shape == (8,)

    d2 = demo_data("x", "y")
    assert sorted(d2.keys()) == ["x", "y"]
    assert len(d2["x"]) == len(d2["y"]) == 5

    assert len(demo_data("x", min_rows=10)["x"]) == 10
    assert len(demo_data("a", "b", "x", min_rows=10)["x"]) == 12
    assert len(demo_data("a", "b", "x", min_rows=10, nlevels=3)["x"]) == 18

    from nose.tools import assert_raises
    assert_raises(PatsyError, demo_data, "a", "b", "__123")
    assert_raises(TypeError, demo_data, "a", "b", asdfasdf=123)

Source File: build.py From vnpy_crypto with MIT License

6 votes

def _build_subterm(subterm, factor_infos, factor_values, out):
    assert subterm.num_columns == out.shape[1]
    out[...] = 1
    for i, column_idxs in enumerate(
            _subterm_column_combinations(factor_infos, subterm)):
        for factor, column_idx in zip(subterm.factors, column_idxs):
            if factor_infos[factor].type == "categorical":
                contrast = subterm.contrast_matrices[factor]
                if np.any(factor_values[factor] < 0):
                    raise PatsyError("can't build a design matrix "
                                     "containing missing values", factor)
                out[:, i] *= contrast.matrix[factor_values[factor],
                                             column_idx]
            else:
                assert factor_infos[factor].type == "numerical"
                assert (factor_values[factor].shape[1]
                        == factor_infos[factor].num_columns)
                out[:, i] *= factor_values[factor][:, column_idx]

Source File: constraint.py From vnpy_crypto with MIT License

6 votes

def eval(self, tree, constraint=False):
        key = (tree.type, len(tree.args))
        assert key in self._dispatch
        val = self._dispatch[key](tree)
        if constraint:
            # Force it to be a constraint
            if isinstance(val, LinearConstraint):
                return val
            else:
                assert val.size == self._N + 1
                if np.all(val[:self._N] == 0):
                    raise PatsyError("term is constant, with no variables",
                                        tree)
                return LinearConstraint(self._variable_names,
                                        val[:self._N],
                                        -val[-1])
        else:
            # Force it to *not* be a constraint
            if isinstance(val, LinearConstraint):
                raise PatsyError("unexpected constraint object", tree)
            return val

Source File: constraint.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def _eval_binary_eq(self, tree):
        # Handle "a1 = a2 = a3", which is parsed as "(a1 = a2) = a3"
        args = list(tree.args)
        constraints = []
        for i, arg in enumerate(args):
            if arg.type == "=":
                constraints.append(self.eval(arg, constraint=True))
                # make our left argument be their right argument, or
                # vice-versa
                args[i] = arg.args[1 - i]
        left = self.eval(args[0])
        right = self.eval(args[1])
        coefs = left[:self._N] - right[:self._N]
        if np.all(coefs == 0):
            raise PatsyError("no variables appear in constraint", tree)
        constant = -left[-1] + right[-1]
        constraint = LinearConstraint(self._variable_names, coefs, constant)
        constraints.append(constraint)
        return LinearConstraint.combine(constraints)

Source File: parse_formula.py From vnpy_crypto with MIT License

6 votes

def _parsing_error_test(parse_fn, error_descs): # pragma: no cover
    for error_desc in error_descs:
        letters = []
        start = None
        end = None
        for letter in error_desc:
            if letter == "<":
                start = len(letters)
            elif letter == ">":
                end = len(letters)
            else:
                letters.append(letter)
        bad_code = "".join(letters)
        assert start is not None and end is not None
        print(error_desc)
        print(repr(bad_code), start, end)
        try:
            parse_fn(bad_code)
        except PatsyError as e:
            print(e)
            assert e.origin.code == bad_code
            assert e.origin.start == start
            assert e.origin.end == end
        else:
            assert False, "parser failed to report an error!"

Source File: user_util.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def test_demo_data():
    d1 = demo_data("a", "b", "x")
    assert sorted(d1.keys()) == ["a", "b", "x"]
    assert d1["a"] == ["a1", "a1", "a2", "a2", "a1", "a1", "a2", "a2"]
    assert d1["b"] == ["b1", "b2", "b1", "b2", "b1", "b2", "b1", "b2"]
    assert d1["x"].dtype == np.dtype(float)
    assert d1["x"].shape == (8,)

    d2 = demo_data("x", "y")
    assert sorted(d2.keys()) == ["x", "y"]
    assert len(d2["x"]) == len(d2["y"]) == 5

    assert len(demo_data("x", min_rows=10)["x"]) == 10
    assert len(demo_data("a", "b", "x", min_rows=10)["x"]) == 12
    assert len(demo_data("a", "b", "x", min_rows=10, nlevels=3)["x"]) == 18

    from nose.tools import assert_raises
    assert_raises(PatsyError, demo_data, "a", "b", "__123")
    assert_raises(TypeError, demo_data, "a", "b", asdfasdf=123)

Source File: desc.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def _eval_binary_power(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    _check_interactable(left_expr)
    power = -1
    if tree.args[1].type in ("ONE", "NUMBER"):
        expr = tree.args[1].token.extra
        try:
            power = int(expr)
        except ValueError:
            pass
    if power < 1:
        raise PatsyError("'**' requires a positive integer", tree.args[1])
    all_terms = left_expr.terms
    big_expr = left_expr
    # Small optimization: (a + b)**100 is just the same as (a + b)**2.
    power = min(len(left_expr.terms), power)
    for i in range(1, power):
        big_expr = _interaction(left_expr, big_expr)
        all_terms = all_terms + big_expr.terms
    return IntermediateExpr(False, None, False, all_terms)

Source File: highlevel.py From vnpy_crypto with MIT License

6 votes

def dmatrices(formula_like, data={}, eval_env=0,
              NA_action="drop", return_type="matrix"):
    """Construct two design matrices given a formula_like and data.

    This function is identical to :func:`dmatrix`, except that it requires
    (and returns) two matrices instead of one. By convention, the first matrix
    is the "outcome" or "y" data, and the second is the "predictor" or "x"
    data.

    See :func:`dmatrix` for details.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
                                      NA_action, return_type)
    if lhs.shape[1] == 0:
        raise PatsyError("model is missing required outcome variables")
    return (lhs, rhs)

Source File: desc.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def eval(self, tree, require_evalexpr=True):
        result = None
        assert isinstance(tree, ParseNode)
        key = (tree.type, len(tree.args))
        if key not in self._evaluators:
            raise PatsyError("I don't know how to evaluate this "
                                "'%s' operator" % (tree.type,),
                                tree.token)
        result = self._evaluators[key](self, tree)
        if require_evalexpr and not isinstance(result, IntermediateExpr):
            if isinstance(result, ModelDesc):
                raise PatsyError("~ can only be used once, and "
                                    "only at the top level",
                                    tree)
            else:
                raise PatsyError("custom operator returned an "
                                    "object that I don't know how to "
                                    "handle", tree)
        return result

#############

Source File: eval.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def ast_names(code):
    """Iterator that yields all the (ast) names in a Python expression.

    :arg code: A string containing a Python expression.
    """
    # Syntax that allows new name bindings to be introduced is tricky to
    # handle here, so we just refuse to do so.
    disallowed_ast_nodes = (ast.Lambda, ast.ListComp, ast.GeneratorExp)
    if sys.version_info >= (2, 7):
        disallowed_ast_nodes += (ast.DictComp, ast.SetComp)

    for node in ast.walk(ast.parse(code)):
        if isinstance(node, disallowed_ast_nodes):
            raise PatsyError("Lambda, list/dict/set comprehension, generator "
                             "expression in patsy formula not currently supported.")
        if isinstance(node, ast.Name):
            yield node.id

Source File: data.py From vnpy_crypto with MIT License

6 votes

def __setstate__(self, d):
        if "restore_design_info" in d:
            # NOTE: there may be a more performant way to do this
            from patsy import dmatrices, PatsyError
            exc = []
            try:
                data = d['frame']
            except KeyError:
                data = d['orig_endog'].join(d['orig_exog'])

            for depth in [2, 3, 1, 0, 4]:  # sequence is a guess where to likely find it
                try:
                    _, design = dmatrices(d['formula'], data, eval_env=depth,
                                          return_type='dataframe')
                    break
                except (NameError, PatsyError) as e:
                    print('not in depth %d' % depth)
                    exc.append(e)   # why do I need a reference from outside except block
                    pass
            else:
                raise exc[-1]

            self.design_info = design.design_info
            del d["restore_design_info"]
        self.__dict__.update(d)

Source File: compat.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
    try:
        return f(*args, **kwargs)
    except Exception as e:
        if sys.version_info[0] >= 3:
            new_exc = PatsyError("%s: %s: %s"
                                 % (msg, e.__class__.__name__, e),
                                 origin)
            # Use 'exec' to hide this syntax from the Python 2 parser:
            exec("raise new_exc from e")
        else:
            # In python 2, we just let the original exception escape -- better
            # than destroying the traceback. But if it's a PatsyError, we can
            # at least set the origin properly.
            if isinstance(e, PatsyError):
                e.set_origin(origin)
            raise

Source File: infix_parser.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def _read_noun_context(token, c):
    if token.type == Token.LPAREN:
        if c.trace:
            print("Pushing open-paren")
        c.op_stack.append(_StackOperator(_open_paren, token))
        return True
    elif token.type in c.unary_ops:
        if c.trace:
            print("Pushing unary op %r" % (token.type,))
        c.op_stack.append(_StackOperator(c.unary_ops[token.type], token))
        return True
    elif token.type in c.atomic_types:
        if c.trace:
            print("Pushing noun %r (%r)" % (token.type, token.extra))
        c.noun_stack.append(ParseNode(token.type, token, [],
                                      token.origin))
        return False
    else:
        raise PatsyError("expected a noun, not '%s'"
                            % (token.origin.relevant_code(),),
                            token)

Source File: desc.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _check_interactable(expr):
    if expr.intercept:
        raise PatsyError("intercept term cannot interact with "
                            "anything else", expr.intercept_origin)

Source File: highlevel.py From vnpy_crypto with MIT License

5 votes

def incr_dbuilders(formula_like, data_iter_maker, eval_env=0,
                   NA_action="drop"):
    """Construct two design matrix builders incrementally from a large data
    set.

    :func:`incr_dbuilders` is to :func:`incr_dbuilder` as :func:`dmatrices` is
    to :func:`dmatrix`. See :func:`incr_dbuilder` for details.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                      NA_action)
    if design_infos is None:
        raise PatsyError("bad formula-like object")
    if len(design_infos[0].column_names) == 0:
        raise PatsyError("model is missing required outcome variables")
    return design_infos

# This always returns a length-two tuple,
#   response, predictors
# where
#   response is a DesignMatrix (possibly with 0 columns)
#   predictors is a DesignMatrix
# The input 'formula_like' could be like:
#   (np.ndarray, np.ndarray)
#   (DesignMatrix, DesignMatrix)
#   (None, DesignMatrix)
#   np.ndarray  # for predictor-only models
#   DesignMatrix
#   (None, np.ndarray)
#   "y ~ x"
#   ModelDesc(...)
#   DesignInfo
#   (DesignInfo, DesignInfo)
#   any object with a special method __patsy_get_model_desc__

Source File: missing.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _handle_NA_raise(self, values, is_NAs, origins):
        for is_NA, origin in zip(is_NAs, origins):
            if np.any(is_NA):
                raise PatsyError("factor contains missing values", origin)
        return values

Source File: missing.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def handle_NA(self, values, is_NAs, origins):
        """Takes a set of factor values that may have NAs, and handles them
        appropriately.

        :arg values: A list of `ndarray` objects representing the data.
          These may be 1- or 2-dimensional, and may be of varying dtype. All
          will have the same number of rows (or entries, for 1-d arrays).
        :arg is_NAs: A list with the same number of entries as `values`,
          containing boolean `ndarray` objects that indicate which rows
          contain NAs in the corresponding entry in `values`.
        :arg origins: A list with the same number of entries as
          `values`, containing information on the origin of each
          value. If we encounter a problem with some particular value, we use
          the corresponding entry in `origins` as the origin argument when
          raising a :class:`PatsyError`.
        :returns: A list of new values (which may have a differing number of
          rows.)
        """
        assert len(values) == len(is_NAs) == len(origins)
        if len(values) == 0:
            return values
        if self.on_NA == "raise":
            return self._handle_NA_raise(values, is_NAs, origins)
        elif self.on_NA == "drop":
            return self._handle_NA_drop(values, is_NAs, origins)
        else: # pragma: no cover
            assert False

Source File: formulation.py From pyblp with MIT License

5 votes

def parse_term_expression(term: patsy.desc.Term) -> sp.Expr:
    """Multiply the SymPy expressions parsed from each factor in a patsy term."""
    expression = sp.Integer(1)
    for factor in term.factors:
        try:
            expression *= parse_expression(factor.name())
        except Exception as exception:
            raise patsy.PatsyError("Failed to parse a term.", factor.origin) from exception

    return expression

Source File: contrasts.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def __init__(self, matrix, column_suffixes):
        self.matrix = np.asarray(matrix)
        self.column_suffixes = column_suffixes
        if self.matrix.shape[1] != len(column_suffixes):
            raise PatsyError("matrix and column_suffixes don't conform")

Source File: formulation.py From pyblp with MIT License

5 votes

def parse_terms(formula: str) -> List[patsy.desc.Term]:
    """Parse patsy terms from a string. Validate that the string contains only right-hand side terms."""
    description = patsy.highlevel.ModelDesc.from_formula(formula)
    if description.lhs_termlist:
        end = formula.index('~') + 1 if '~' in formula else len(formula)
        raise patsy.PatsyError("Formulas should not have left-hand sides.", patsy.origin.Origin(formula, 0, end))
    return description.rhs_termlist

Source File: formulation.py From pyblp with MIT License

5 votes

def _build_ids(self, data: Mapping) -> Array:
        """Convert a mapping from variable names to arrays into the designed matrix of IDs to be absorbed."""

        # normalize the data
        data_mapping: Data = {}
        for name in self._absorbed_names:
            try:
                data_mapping[name] = np.asarray(data[name]).flatten()
            except Exception as exception:
                assert self._absorb is not None
                origin = patsy.origin.Origin(self._absorb, 0, len(self._absorb))
                raise patsy.PatsyError(f"Failed to load data for '{name}'.", origin) from exception

        # build columns of absorbed IDs
        ids_columns: List[Array] = []
        for term in self._absorbed_terms:
            factor_columns: List[Array] = []
            term_design = design_matrix([term], data_mapping)
            for factor, info in term_design.factor_infos.items():
                if info.type != 'categorical':
                    raise patsy.PatsyError("Only categorical variables can be absorbed.", factor.origin)
                symbol = parse_expression(factor.name())
                factor_columns.append(data_mapping[symbol.name])
            ids_columns.append(interact_ids(*factor_columns))

        return np.column_stack(ids_columns)

Source File: categorical.py From vnpy_crypto with MIT License

5 votes

def sniff(self, data):
        if hasattr(data, "contrast"):
            self._contrast = data.contrast
        # returns a bool: are we confident that we found all the levels?
        if isinstance(data, _CategoricalBox):
            if data.levels is not None:
                self._levels = tuple(data.levels)
                return True
            else:
                # unbox and fall through
                data = data.data
        if safe_is_pandas_categorical(data):
            # pandas.Categorical has its own NA detection, so don't try to
            # second-guess it.
            self._levels = tuple(pandas_Categorical_categories(data))
            return True
        # fastpath to avoid doing an item-by-item iteration over boolean
        # arrays, as requested by #44
        if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_):
            self._level_set = set([True, False])
            return True

        data = _categorical_shape_fix(data)

        for value in data:
            if self._NA_action.is_categorical_NA(value):
                continue
            if value is True or value is False:
                self._level_set.update([True, False])
            else:
                try:
                    self._level_set.add(value)
                except TypeError:
                    raise PatsyError("Error interpreting categorical data: "
                                     "all items must be hashable",
                                     self._origin)
        # If everything we've seen is boolean, assume that everything else
        # would be too. Otherwise we need to keep looking.
        return self._level_set == set([True, False])

Source File: categorical.py From vnpy_crypto with MIT License

5 votes

def _categorical_shape_fix(data):
    # helper function
    # data should not be a _CategoricalBox or pandas Categorical or anything
    # -- it should be an actual iterable of data, but which might have the
    # wrong shape.
    if hasattr(data, "ndim") and data.ndim > 1:
        raise PatsyError("categorical data cannot be >1-dimensional")
    # coerce scalars into 1d, which is consistent with what we do for numeric
    # factors. (See statsmodels/statsmodels#1881)
    if (not iterable(data)
        or isinstance(data, (six.text_type, six.binary_type))):
        data = [data]
    return data

Python patsy.PatsyError() Examples