Python pandas.Categorical() Examples

The following are 30 code examples of pandas.Categorical(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_constructors.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_with_generator(self):
        # This was raising an Error in isna(single_val).any() because isna
        # returned a scalar for a generator
        xrange = range

        exp = Categorical([0, 1, 2])
        cat = Categorical((x for x in [0, 1, 2]))
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical(xrange(3))
        tm.assert_categorical_equal(cat, exp)

        # This uses xrange internally
        from pandas.core.index import MultiIndex
        MultiIndex.from_product([range(5), ['a', 'b', 'c']])

        # check that categories accept generators and sequences
        cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical([0, 1, 2], categories=xrange(3))
        tm.assert_categorical_equal(cat, exp) 
Example #2
Source File: test_missing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_nan_handling(self):

        # Nans are represented as -1 in codes
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
                                                       dtype=np.int8))
        c[1] = np.nan
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
                                                       dtype=np.int8))

        # Adding nan to categories should make assigned nan point to the
        # category!
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
                                                       dtype=np.int8)) 
Example #3
Source File: nameddict.py    From pyGSTi with Apache License 2.0 6 votes vote down vote up
def as_dataframe(self):
        import pandas as _pandas

        columns = {'value': []}
        seriestypes = {'value': "unknown"}
        self._add_to_columns(columns, seriestypes, {})

        columns_as_series = {}
        for colname, lst in columns.items():
            seriestype = seriestypes[colname]
            if seriestype == 'float':
                s = _np.array(lst, dtype='d')
            elif seriestype == 'int':
                s = _np.array(lst, dtype=int)  # or pd.Series w/dtype?
            elif seriestype == 'category':
                s = _pandas.Categorical(lst)
            else:
                s = lst  # will infer an object array?

            columns_as_series[colname] = s

        df = _pandas.DataFrame(columns_as_series)
        return df 
Example #4
Source File: test_indexing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_setitem(self):

        # int/positional
        c = self.factor.copy()
        c[0] = 'b'
        assert c[0] == 'b'
        c[-1] = 'a'
        assert c[-1] == 'a'

        # boolean
        c = self.factor.copy()
        indexer = np.zeros(len(c), dtype='bool')
        indexer[0] = True
        indexer[-1] = True
        c[indexer] = 'c'
        expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
                               ordered=True)

        tm.assert_categorical_equal(c, expected) 
Example #5
Source File: test_indexing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categories_assigments(self):
        s = Categorical(["a", "b", "c", "a"])
        exp = np.array([1, 2, 3, 1], dtype=np.int64)
        s.categories = [1, 2, 3]
        tm.assert_numpy_array_equal(s.__array__(), exp)
        tm.assert_index_equal(s.categories, Index([1, 2, 3]))

        # lengthen
        with pytest.raises(ValueError):
            s.categories = [1, 2, 3, 4]

        # shorten
        with pytest.raises(ValueError):
            s.categories = [1, 2]

    # Combinations of sorted/unique: 
Example #6
Source File: test_indexing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def non_coercible_categorical(monkeypatch):
    """
    Monkeypatch Categorical.__array__ to ensure no implicit conversion.

    Raises
    ------
    ValueError
        When Categorical.__array__ is called.
    """
    # TODO(Categorical): identify other places where this may be
    # useful and move to a conftest.py
    def array(self, dtype=None):
        raise ValueError("I cannot be converted.")

    with monkeypatch.context() as m:
        m.setattr(Categorical, "__array__", array)
        yield 
Example #7
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_codes_dtypes(self):

        # GH 8453
        result = Categorical(['foo', 'bar', 'baz'])
        assert result.codes.dtype == 'int8'

        result = Categorical(['foo%05d' % i for i in range(400)])
        assert result.codes.dtype == 'int16'

        result = Categorical(['foo%05d' % i for i in range(40000)])
        assert result.codes.dtype == 'int32'

        # adding cats
        result = Categorical(['foo', 'bar', 'baz'])
        assert result.codes.dtype == 'int8'
        result = result.add_categories(['foo%05d' % i for i in range(400)])
        assert result.codes.dtype == 'int16'

        # removing cats
        result = result.remove_categories(['foo%05d' % i for i in range(300)])
        assert result.codes.dtype == 'int8' 
Example #8
Source File: test_api.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_ordered_api(self):
        # GH 9347
        cat1 = Categorical(list('acb'), ordered=False)
        tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c']))
        assert not cat1.ordered

        cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False)
        tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a']))
        assert not cat2.ordered

        cat3 = Categorical(list('acb'), ordered=True)
        tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c']))
        assert cat3.ordered

        cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True)
        tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
        assert cat4.ordered 
Example #9
Source File: test_api.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_set_ordered(self):

        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        cat2 = cat.as_unordered()
        assert not cat2.ordered
        cat2 = cat.as_ordered()
        assert cat2.ordered
        cat2.as_unordered(inplace=True)
        assert not cat2.ordered
        cat2.as_ordered(inplace=True)
        assert cat2.ordered

        assert cat2.set_ordered(True).ordered
        assert not cat2.set_ordered(False).ordered
        cat2.set_ordered(True, inplace=True)
        assert cat2.ordered
        cat2.set_ordered(False, inplace=True)
        assert not cat2.ordered

        # removed in 0.19.0
        msg = "can\'t set attribute"
        with pytest.raises(AttributeError, match=msg):
            cat.ordered = True
        with pytest.raises(AttributeError, match=msg):
            cat.ordered = False 
Example #10
Source File: test_api.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_codes_immutable(self):

        # Codes should be read only
        c = Categorical(["a", "b", "c", "a", np.nan])
        exp = np.array([0, 1, 2, 0, -1], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp)

        # Assignments to codes should raise
        with pytest.raises(ValueError):
            c.codes = np.array([0, 1, 2, 0, 1], dtype='int8')

        # changes in the codes array should raise
        codes = c.codes

        with pytest.raises(ValueError):
            codes[4] = 1

        # But even after getting the codes, the original array should still be
        # writeable!
        c[4] = "a"
        exp = np.array([0, 1, 2, 0, 0], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp)
        c._codes[4] = 2
        exp = np.array([0, 1, 2, 0, 2], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp) 
Example #11
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_astype(self, ordered):
        # string
        cat = Categorical(list('abbaaccc'), ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat)
        tm.assert_numpy_array_equal(result, expected)

        msg = 'could not convert string to float'
        with pytest.raises(ValueError, match=msg):
            cat.astype(float)

        # numeric
        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat, dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        result = cat.astype(int)
        expected = np.array(cat, dtype=np.int)
        tm.assert_numpy_array_equal(result, expected)

        result = cat.astype(float)
        expected = np.array(cat, dtype=np.float)
        tm.assert_numpy_array_equal(result, expected) 
Example #12
Source File: test_api.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_remove_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
                          ordered=True)

        # first inplace == False
        res = cat.remove_categories("c")
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        res = cat.remove_categories(["c"])
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.remove_categories("c", inplace=True)
        tm.assert_categorical_equal(cat, new)
        assert res is None

        # removal is not in categories
        with pytest.raises(ValueError):
            cat.remove_categories(["c"]) 
Example #13
Source File: test_operators.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_compare_frame(self):
        # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
        data = ["a", "b", 2, "a"]
        cat = Categorical(data)

        df = DataFrame(cat)

        for op in [operator.eq, operator.ne, operator.ge,
                   operator.gt, operator.le, operator.lt]:
            with pytest.raises(ValueError):
                # alignment raises unless we transpose
                op(cat, df)

        result = cat == df.T
        expected = DataFrame([[True, True, True, True]])
        tm.assert_frame_equal(result, expected)

        result = cat[::-1] != df.T
        expected = DataFrame([[False, True, True, False]])
        tm.assert_frame_equal(result, expected) 
Example #14
Source File: test_sorting.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_numpy_argsort(self):
        c = Categorical([5, 3, 1, 4, 2], ordered=True)

        expected = np.array([2, 4, 1, 3, 0])
        tm.assert_numpy_array_equal(np.argsort(c), expected,
                                    check_dtype=False)

        tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected,
                                    check_dtype=False)

        msg = "the 'axis' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(c, axis=0)

        msg = "the 'order' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(c, order='C') 
Example #15
Source File: test_constructors.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_constructor_tuples_datetimes(self):
        # numpy will auto reshape when all of the tuples are the
        # same len, so add an extra one with 2 items and slice it off
        values = np.array([(Timestamp('2010-01-01'),),
                           (Timestamp('2010-01-02'),),
                           (Timestamp('2010-01-01'),),
                           (Timestamp('2010-01-02'),),
                           ('a', 'b')], dtype=object)[:-1]
        result = Categorical(values)
        expected = Index([(Timestamp('2010-01-01'),),
                          (Timestamp('2010-01-02'),)], tupleize_cols=False)
        tm.assert_index_equal(result.categories, expected) 
Example #16
Source File: test_operators.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unordered_different_categories_raises(self):
        c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
        c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False)

        with pytest.raises(TypeError, match=("Categoricals can "
                                             "only be compared")):
            c1 == c2 
Example #17
Source File: test_operators.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_reflected_comparison_with_scalars(self):
        # GH8658
        cat = Categorical([1, 2, 3], ordered=True)
        tm.assert_numpy_array_equal(cat > cat[0],
                                    np.array([False, True, True]))
        tm.assert_numpy_array_equal(cat[0] < cat,
                                    np.array([False, True, True])) 
Example #18
Source File: test_operators.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_datetime_categorical_comparison(self):
        dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True)
        tm.assert_numpy_array_equal(dt_cat > dt_cat[0],
                                    np.array([False, True, True]))
        tm.assert_numpy_array_equal(dt_cat[0] < dt_cat,
                                    np.array([False, True, True])) 
Example #19
Source File: test_operators.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categories_none_comparisons(self):
        factor = Categorical(['a', 'b', 'b', 'a',
                              'a', 'c', 'c', 'c'], ordered=True)
        tm.assert_categorical_equal(factor, self.factor) 
Example #20
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_fill_value_new_raises(self):
        # https://github.com/pandas-dev/pandas/issues/23296
        cat = pd.Categorical(['a', 'b', 'c'])
        xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
        with pytest.raises(TypeError, match=xpr):
            cat.take([0, 1, -1], fill_value='d', allow_fill=True) 
Example #21
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_fill_with_negative_one(self):
        # -1 was a category
        cat = pd.Categorical([-1, 0, 1])
        result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
        expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
        tm.assert_categorical_equal(result, expected) 
Example #22
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_allow_fill(self):
        # https://github.com/pandas-dev/pandas/issues/23296
        cat = pd.Categorical(['a', 'a', 'b'])
        result = cat.take([0, -1, -1], allow_fill=True)
        expected = pd.Categorical(['a', np.nan, np.nan],
                                  categories=['a', 'b'])
        tm.assert_categorical_equal(result, expected) 
Example #23
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_positional_take_unobserved(self, ordered):
        cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
                             ordered=ordered)
        result = cat.take([1, 0], allow_fill=False)
        expected = pd.Categorical(['b', 'a'], categories=cat.categories,
                                  ordered=ordered)
        tm.assert_categorical_equal(result, expected) 
Example #24
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_positional_take(self, ordered):
        cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
                             ordered=ordered)
        result = cat.take([0, 1, 2], allow_fill=False)
        expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
                                  ordered=ordered)
        tm.assert_categorical_equal(result, expected) 
Example #25
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_empty(self, allow_fill):
        # https://github.com/pandas-dev/pandas/issues/20664
        cat = pd.Categorical([], categories=['a', 'b'])
        with pytest.raises(IndexError):
            cat.take([0], allow_fill=allow_fill) 
Example #26
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_positive_no_warning(self):
        cat = pd.Categorical(['a', 'b'])
        with tm.assert_produces_warning(None):
            cat.take([0, 0]) 
Example #27
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_take_warns(self):
        cat = pd.Categorical(['a', 'b'])
        with tm.assert_produces_warning(FutureWarning):
            cat.take([0, -1]) 
Example #28
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_isin_empty(empty):
    s = pd.Categorical(["a", "b"])
    expected = np.array([False, False], dtype=bool)

    result = s.isin(empty)
    tm.assert_numpy_array_equal(expected, result) 
Example #29
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_isin_cats():
    # GH2003
    cat = pd.Categorical(["a", "b", np.nan])

    result = cat.isin(["a", np.nan])
    expected = np.array([True, False, True], dtype=bool)
    tm.assert_numpy_array_equal(expected, result)

    result = cat.isin(["a", "c"])
    expected = np.array([True, False, False], dtype=bool)
    tm.assert_numpy_array_equal(expected, result) 
Example #30
Source File: test_operators.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_compare_unordered_different_order(self):
        # https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
        # 349290078
        a = pd.Categorical(['a'], categories=['a', 'b'])
        b = pd.Categorical(['b'], categories=['b', 'a'])
        assert not a.equals(b)