Python Examples of pandas.CategoricalIndex

Source File: test_reshape.py From recruit with Apache License 2.0

6 votes

def test_dataframe_dummies_preserve_categorical_dtype(self, dtype):
        # GH13854
        for ordered in [False, True]:
            cat = pd.Categorical(list("xy"), categories=list("xyz"),
                                 ordered=ordered)
            result = get_dummies(cat, dtype=dtype)

            data = np.array([[1, 0, 0], [0, 1, 0]],
                            dtype=self.effective_dtype(dtype))
            cols = pd.CategoricalIndex(cat.categories,
                                       categories=cat.categories,
                                       ordered=ordered)
            expected = DataFrame(data, columns=cols,
                                 dtype=self.effective_dtype(dtype))

            tm.assert_frame_equal(result, expected)

Source File: test_construction.py From recruit with Apache License 2.0

6 votes

def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'],
            tz='US/Eastern')

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti)

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_isin(self):

        ci = CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b'])
        tm.assert_numpy_array_equal(
            ci.isin(['c']),
            np.array([False, False, False, True, False, False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))

        # mismatched categorical -> coerced to ndarray so doesn't matter
        result = ci.isin(ci.set_categories(list('abcdefghi')))
        expected = np.array([True] * 6)
        tm.assert_numpy_array_equal(result, expected)

        result = ci.isin(ci.set_categories(list('defghi')))
        expected = np.array([False] * 5 + [True])
        tm.assert_numpy_array_equal(result, expected)

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_repr_roundtrip(self):

        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        str(ci)
        tm.assert_index_equal(eval(repr(ci)), ci, exact=True)

        # formatting
        if PY3:
            str(ci)
        else:
            compat.text_type(ci)

        # long format
        # this is not reprable
        ci = CategoricalIndex(np.random.randint(0, 5, size=100))
        if PY3:
            str(ci)
        else:
            compat.text_type(ci)

Source File: common.py From recruit with Apache License 2.0

6 votes

def test_get_indexer_consistency(self):
        # See GH 16819
        for name, index in self.indices.items():
            if isinstance(index, IntervalIndex):
                continue

            if index.is_unique or isinstance(index, CategoricalIndex):
                indexer = index.get_indexer(index[0:2])
                assert isinstance(indexer, np.ndarray)
                assert indexer.dtype == np.intp
            else:
                e = "Reindexing only valid with uniquely valued Index objects"
                with pytest.raises(InvalidIndexError, match=e):
                    index.get_indexer(index[0:2])

            indexer, _ = index.get_indexer_non_unique(index[0:2])
            assert isinstance(indexer, np.ndarray)
            assert indexer.dtype == np.intp

Source File: common.py From recruit with Apache License 2.0

6 votes

def test_numpy_argsort(self):
        for k, ind in self.indices.items():
            result = np.argsort(ind)
            expected = ind.argsort()
            tm.assert_numpy_array_equal(result, expected)

            # these are the only two types that perform
            # pandas compatibility input validation - the
            # rest already perform separate (or no) such
            # validation via their 'values' attribute as
            # defined in pandas.core.indexes/base.py - they
            # cannot be changed at the moment due to
            # backwards compatibility concerns
            if isinstance(type(ind), (CategoricalIndex, RangeIndex)):
                msg = "the 'axis' parameter is not supported"
                with pytest.raises(ValueError, match=msg):
                    np.argsort(ind, axis=1)

                msg = "the 'kind' parameter is not supported"
                with pytest.raises(ValueError, match=msg):
                    np.argsort(ind, kind='mergesort')

                msg = "the 'order' parameter is not supported"
                with pytest.raises(ValueError, match=msg):
                    np.argsort(ind, order=('a', 'b'))

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_value_counts(self):
        # GH 12835
        cats = Categorical(list('abcccb'), categories=list('cabd'))
        s = Series(cats, name='xxx')
        res = s.value_counts(sort=False)

        exp_index = CategoricalIndex(list('cabd'), categories=cats.categories)
        exp = Series([3, 1, 2, 0], name='xxx', index=exp_index)
        tm.assert_series_equal(res, exp)

        res = s.value_counts(sort=True)

        exp_index = CategoricalIndex(list('cbad'), categories=cats.categories)
        exp = Series([3, 2, 1, 0], name='xxx', index=exp_index)
        tm.assert_series_equal(res, exp)

        # check object dtype handles the Series.name as the same
        # (tested in test_base.py)
        s = Series(["a", "b", "c", "c", "c", "b"], name='xxx')
        res = s.value_counts()
        exp = Series([3, 2, 1], name='xxx', index=["c", "b", "a"])
        tm.assert_series_equal(res, exp)

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_value_counts_categorical_ordered(self):
        # most dtypes are tested in test_base.py
        values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=True)

        exp_idx = pd.CategoricalIndex([1, 3, 2], categories=[1, 2, 3],
                                      ordered=True)
        exp = pd.Series([3, 2, 1], index=exp_idx, name='xxx')

        s = pd.Series(values, name='xxx')
        tm.assert_series_equal(s.value_counts(), exp)
        # check CategoricalIndex outputs the same result
        idx = pd.CategoricalIndex(values, name='xxx')
        tm.assert_series_equal(idx.value_counts(), exp)

        # normalize
        exp = pd.Series(np.array([3., 2., 1]) / 6.,
                        index=exp_idx, name='xxx')
        tm.assert_series_equal(s.value_counts(normalize=True), exp)
        tm.assert_series_equal(idx.value_counts(normalize=True), exp)

Source File: test_categorical.py From recruit with Apache License 2.0

6 votes

def test_unstack_categorical():
    # GH11558 (example is taken from the original issue)
    df = pd.DataFrame({'a': range(10),
                       'medium': ['A', 'B'] * 5,
                       'artist': list('XYXXY') * 2})
    df['medium'] = df['medium'].astype('category')

    gcat = df.groupby(
        ['artist', 'medium'], observed=False)['a'].count().unstack()
    result = gcat.describe()

    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
                                      name='medium')
    tm.assert_index_equal(result.columns, exp_columns)
    tm.assert_categorical_equal(result.columns.values, exp_columns.values)

    result = gcat['A'] + gcat['B']
    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
    tm.assert_series_equal(result, expected)

Source File: test_categorical.py From recruit with Apache License 2.0

6 votes

def test_map_with_dict_or_series(self):
        orig_values = ['a', 'B', 1, 'a']
        new_values = ['one', 2, 3.0, 'one']
        cur_index = pd.CategoricalIndex(orig_values, name='XXX')
        expected = pd.CategoricalIndex(new_values,
                                       name='XXX', categories=[3.0, 2, 'one'])

        mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
        output = cur_index.map(mapper)
        # Order of categories in output can be different
        tm.assert_index_equal(expected, output)

        mapper = {o: n for o, n in
                  zip(orig_values[:-1], new_values[:-1])}
        output = cur_index.map(mapper)
        # Order of categories in output can be different
        tm.assert_index_equal(expected, output)

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_construction_with_dtype(self):

        # specify dtype
        ci = self.create_index(categories=list('abc'))

        result = Index(np.array(ci), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        result = Index(np.array(ci).tolist(), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        # these are generally only equal when the categories are reordered
        ci = self.create_index()

        result = Index(
            np.array(ci), dtype='category').reorder_categories(ci.categories)
        tm.assert_index_equal(result, ci, exact=True)

        # make sure indexes are handled
        expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2],
                                    ordered=True)
        idx = Index(range(3))
        result = CategoricalIndex(idx, categories=idx, ordered=True)
        tm.assert_index_equal(result, expected, exact=True)

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_map_dictlike(idx, mapper):

    if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
        pytest.skip("skipping tests for {}".format(type(idx)))

    identity = mapper(idx.values, idx)

    # we don't infer to UInt64 for a dict
    if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
        expected = idx.astype('int64')
    else:
        expected = idx

    result = idx.map(identity)
    tm.assert_index_equal(result, expected)

    # empty mappable
    expected = pd.Index([np.nan] * len(idx))
    result = idx.map(mapper(expected, idx))
    tm.assert_index_equal(result, expected)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_bar_categorical(self):
        # GH 13019
        df1 = pd.DataFrame(np.random.randn(6, 5),
                           index=pd.Index(list('ABCDEF')),
                           columns=pd.Index(list('abcde')))
        # categorical index must behave the same
        df2 = pd.DataFrame(np.random.randn(6, 5),
                           index=pd.CategoricalIndex(list('ABCDEF')),
                           columns=pd.CategoricalIndex(list('abcde')))

        for df in [df1, df2]:
            ax = df.plot.bar()
            ticks = ax.xaxis.get_ticklocs()
            tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
            assert ax.get_xlim() == (-0.5, 5.5)
            # check left-edge of bars
            assert ax.patches[0].get_x() == -0.25
            assert ax.patches[-1].get_x() == 5.15

            ax = df.plot.bar(stacked=True)
            tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
            assert ax.get_xlim() == (-0.5, 5.5)
            assert ax.patches[0].get_x() == -0.25
            assert ax.patches[-1].get_x() == 4.75

Source File: test_sorting.py From recruit with Apache License 2.0

6 votes

def test_numpy_argsort(idx):
    result = np.argsort(idx)
    expected = idx.argsort()
    tm.assert_numpy_array_equal(result, expected)

    # these are the only two types that perform
    # pandas compatibility input validation - the
    # rest already perform separate (or no) such
    # validation via their 'values' attribute as
    # defined in pandas.core.indexes/base.py - they
    # cannot be changed at the moment due to
    # backwards compatibility concerns
    if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
        msg = "the 'axis' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, axis=1)

        msg = "the 'kind' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, kind='mergesort')

        msg = "the 'order' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, order=('a', 'b'))

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_contains(self):

        ci = self.create_index(categories=list('cabdef'))

        assert 'a' in ci
        assert 'z' not in ci
        assert 'e' not in ci
        assert np.nan not in ci

        # assert codes NOT in index
        assert 0 not in ci
        assert 1 not in ci

        ci = CategoricalIndex(
            list('aabbca') + [np.nan], categories=list('cabdef'))
        assert np.nan in ci

Source File: test_union_categoricals.py From recruit with Apache License 2.0

6 votes

def test_union_categorical_unwrap(self):
        # GH 14173
        c1 = Categorical(['a', 'b'])
        c2 = pd.Series(['b', 'c'], dtype='category')
        result = union_categoricals([c1, c2])
        expected = Categorical(['a', 'b', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        c2 = CategoricalIndex(c2)
        result = union_categoricals([c1, c2])
        tm.assert_categorical_equal(result, expected)

        c1 = Series(c1)
        result = union_categoricals([c1, c2])
        tm.assert_categorical_equal(result, expected)

        with pytest.raises(TypeError):
            union_categoricals([c1, ['a', 'b', 'c']])

Source File: test_pivot.py From recruit with Apache License 2.0

6 votes

def test_crosstab_with_categorial_columns(self):
        # GH 8860
        df = pd.DataFrame({'MAKE': ['Honda', 'Acura', 'Tesla',
                                    'Honda', 'Honda', 'Acura'],
                           'MODEL': ['Sedan', 'Sedan', 'Electric',
                                     'Pickup', 'Sedan', 'Sedan']})
        categories = ['Sedan', 'Electric', 'Pickup']
        df['MODEL'] = (df['MODEL'].astype('category')
                                  .cat.set_categories(categories))
        result = pd.crosstab(df['MAKE'], df['MODEL'])

        expected_index = pd.Index(['Acura', 'Honda', 'Tesla'], name='MAKE')
        expected_columns = pd.CategoricalIndex(categories,
                                               categories=categories,
                                               ordered=False,
                                               name='MODEL')
        expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]]
        expected = pd.DataFrame(expected_data,
                                index=expected_index,
                                columns=expected_columns)
        tm.assert_frame_equal(result, expected)

Source File: test_constructor.py From recruit with Apache License 2.0

6 votes

def test_from_arrays_index_series_categorical():
    # GH13743
    idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=False)
    idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
                               ordered=True)

    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
    tm.assert_index_equal(result3.get_level_values(0), idx1)
    tm.assert_index_equal(result3.get_level_values(1), idx2)

Source File: test_pivot.py From recruit with Apache License 2.0

6 votes

def test_categorical_aggfunc(self, observed):
        # GH 9534
        df = pd.DataFrame({"C1": ["A", "B", "C", "C"],
                           "C2": ["a", "a", "b", "b"],
                           "V": [1, 2, 3, 4]})
        df["C1"] = df["C1"].astype("category")
        result = df.pivot_table("V", index="C1", columns="C2",
                                dropna=observed, aggfunc="count")

        expected_index = pd.CategoricalIndex(['A', 'B', 'C'],
                                             categories=['A', 'B', 'C'],
                                             ordered=False,
                                             name='C1')
        expected_columns = pd.Index(['a', 'b'], name='C2')
        expected_data = np.array([[1., np.nan],
                                  [1., np.nan],
                                  [np.nan, 2.]])
        expected = pd.DataFrame(expected_data,
                                index=expected_index,
                                columns=expected_columns)
        tm.assert_frame_equal(result, expected)

Source File: test_reshape.py From recruit with Apache License 2.0

6 votes

def test_preserve_categorical_dtype(self):
        # GH13854
        for ordered in [False, True]:
            cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered)
            midx = pd.MultiIndex(levels=[['a'], cidx],
                                 codes=[[0, 0], [0, 1]])
            df = DataFrame([[10, 11]], index=midx)

            expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
                                 index=midx, columns=cidx)

            from pandas.core.reshape.reshape import make_axis_dummies
            result = make_axis_dummies(df)
            tm.assert_frame_equal(result, expected)

            result = make_axis_dummies(df, transform=lambda x: x)
            tm.assert_frame_equal(result, expected)

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_delete(self):

        ci = self.create_index()
        categories = ci.categories

        result = ci.delete(0)
        expected = CategoricalIndex(list('abbca'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        result = ci.delete(-1)
        expected = CategoricalIndex(list('aabbc'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        with pytest.raises((IndexError, ValueError)):
            # Either depending on NumPy version
            ci.delete(10)

Source File: test_category.py From recruit with Apache License 2.0

6 votes

def test_astype(self):

        ci = self.create_index()
        result = ci.astype(object)
        tm.assert_index_equal(result, Index(np.array(ci)))

        # this IS equal, but not the same class
        assert result.equals(ci)
        assert isinstance(result, Index)
        assert not isinstance(result, CategoricalIndex)

        # interval
        ii = IntervalIndex.from_arrays(left=[-0.001, 2.0],
                                       right=[2, 4],
                                       closed='right')

        ci = CategoricalIndex(Categorical.from_codes(
            [0, 1, -1], categories=ii, ordered=True))

        result = ci.astype('interval')
        expected = ii.take([0, 1, -1])
        tm.assert_index_equal(result, expected)

        result = IntervalIndex(result.values)
        tm.assert_index_equal(result, expected)

Source File: common.py From recruit with Apache License 2.0

6 votes

def test_astype_category(self, copy, name, ordered):
        # GH 18630
        index = self.create_index()
        if name:
            index = index.rename(name)

        # standard categories
        dtype = CategoricalDtype(ordered=ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, ordered=ordered)
        tm.assert_index_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, dtype=dtype)
        tm.assert_index_equal(result, expected)

        if ordered is False:
            # dtype='category' defaults to ordered=False, so only test once
            result = index.astype('category', copy=copy)
            expected = CategoricalIndex(index.values, name=name)
            tm.assert_index_equal(result, expected)

Source File: test_astype.py From recruit with Apache License 2.0

5 votes

def test_astype_category(self):
        obj = pd.period_range("2000", periods=2)
        result = obj.astype('category')
        expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"),
                                        pd.Period('2000-01-02', freq="D")])
        tm.assert_index_equal(result, expected)

        result = obj._data.astype('category')
        expected = expected.values
        tm.assert_categorical_equal(result, expected)

Source File: test_astype.py From recruit with Apache License 2.0

5 votes

def test_astype_category(self, tz):
        obj = pd.date_range("2000", periods=2, tz=tz)
        result = obj.astype('category')
        expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz),
                                        pd.Timestamp('2000-01-02', tz=tz)])
        tm.assert_index_equal(result, expected)

        result = obj._data.astype('category')
        expected = expected.values
        tm.assert_categorical_equal(result, expected)

Source File: test_category.py From recruit with Apache License 2.0

5 votes

def test_fillna_categorical(self):
        # GH 11343
        idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x')
        # fill by value in categories
        exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x')
        tm.assert_index_equal(idx.fillna(1.0), exp)

        # fill by value not in categories raises ValueError
        msg = 'fill value must be in categories'
        with pytest.raises(ValueError, match=msg):
            idx.fillna(2.0)

Source File: test_get_set.py From recruit with Apache License 2.0

5 votes

def test_set_levels_categorical(ordered):
    # GH13854
    index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])

    cidx = CategoricalIndex(list("bac"), ordered=ordered)
    result = index.set_levels(cidx, 0)
    expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
                          codes=index.codes)
    tm.assert_index_equal(result, expected)

    result_lvl = result.get_level_values(0)
    expected_lvl = CategoricalIndex(list("bacb"),
                                    categories=cidx.categories,
                                    ordered=cidx.ordered)
    tm.assert_index_equal(result_lvl, expected_lvl)

Source File: test_category.py From recruit with Apache License 2.0

5 votes

def test_engine_type(self, dtype, engine_type):
        if dtype != np.int64:
            # num. of uniques required to push CategoricalIndex.codes to a
            # dtype (128 categories required for .codes dtype to be int16 etc.)
            num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
            ci = pd.CategoricalIndex(range(num_uniques))
        else:
            # having 2**32 - 2**31 categories would be very memory-intensive,
            # so we cheat a bit with the dtype
            ci = pd.CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1)
            ci.values._codes = ci.values._codes.astype('int64')
        assert np.issubdtype(ci.codes.dtype, dtype)
        assert isinstance(ci._engine, engine_type)

Source File: test_category.py From recruit with Apache License 2.0

5 votes

def test_take_invalid_kwargs(self):
        idx = pd.CategoricalIndex([1, 2, 3], name='foo')
        indices = [1, 0, -1]

        msg = r"take\(\) got an unexpected keyword argument 'foo'"
        with pytest.raises(TypeError, match=msg):
            idx.take(indices, foo=2)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            idx.take(indices, out=indices)

        msg = "the 'mode' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            idx.take(indices, mode='clip')

Source File: test_indexing.py From recruit with Apache License 2.0

5 votes

def test_mask_with_boolean(index):
    s = Series(range(3))
    idx = Categorical([True, False, True])
    if index:
        idx = CategoricalIndex(idx)

    assert com.is_bool_indexer(idx)
    result = s[idx]
    expected = s[idx.astype('object')]
    tm.assert_series_equal(result, expected)

Python pandas.CategoricalIndex() Examples