Python pandas.CategoricalIndex() Examples
The following are 30
code examples of pandas.CategoricalIndex().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_reshape.py From recruit with Apache License 2.0 | 6 votes |
def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): # GH13854 for ordered in [False, True]: cat = pd.Categorical(list("xy"), categories=list("xyz"), ordered=ordered) result = get_dummies(cat, dtype=dtype) data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype)) cols = pd.CategoricalIndex(cat.categories, categories=cat.categories, ordered=ordered) expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype)) tm.assert_frame_equal(result, expected)
Example #2
Source File: test_construction.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_preserves_tz(self): # GH#18664 retain tz when going DTI-->Categorical-->DTI # TODO: parametrize over DatetimeIndex/DatetimeArray # once CategoricalIndex(DTA) works dti = pd.DatetimeIndex( [pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'], tz='US/Eastern') ci = pd.CategoricalIndex(dti) carr = pd.Categorical(dti) cser = pd.Series(ci) for obj in [ci, carr, cser]: result = pd.DatetimeIndex(obj) tm.assert_index_equal(result, dti)
Example #3
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_isin(self): ci = CategoricalIndex( list('aabca') + [np.nan], categories=['c', 'a', 'b']) tm.assert_numpy_array_equal( ci.isin(['c']), np.array([False, False, False, True, False, False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter result = ci.isin(ci.set_categories(list('abcdefghi'))) expected = np.array([True] * 6) tm.assert_numpy_array_equal(result, expected) result = ci.isin(ci.set_categories(list('defghi'))) expected = np.array([False] * 5 + [True]) tm.assert_numpy_array_equal(result, expected)
Example #4
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) str(ci) tm.assert_index_equal(eval(repr(ci)), ci, exact=True) # formatting if PY3: str(ci) else: compat.text_type(ci) # long format # this is not reprable ci = CategoricalIndex(np.random.randint(0, 5, size=100)) if PY3: str(ci) else: compat.text_type(ci)
Example #5
Source File: common.py From recruit with Apache License 2.0 | 6 votes |
def test_get_indexer_consistency(self): # See GH 16819 for name, index in self.indices.items(): if isinstance(index, IntervalIndex): continue if index.is_unique or isinstance(index, CategoricalIndex): indexer = index.get_indexer(index[0:2]) assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp else: e = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=e): index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp
Example #6
Source File: common.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_argsort(self): for k, ind in self.indices.items(): result = np.argsort(ind) expected = ind.argsort() tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as # defined in pandas.core.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(ind, axis=1) msg = "the 'kind' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(ind, kind='mergesort') msg = "the 'order' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(ind, order=('a', 'b'))
Example #7
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_value_counts(self): # GH 12835 cats = Categorical(list('abcccb'), categories=list('cabd')) s = Series(cats, name='xxx') res = s.value_counts(sort=False) exp_index = CategoricalIndex(list('cabd'), categories=cats.categories) exp = Series([3, 1, 2, 0], name='xxx', index=exp_index) tm.assert_series_equal(res, exp) res = s.value_counts(sort=True) exp_index = CategoricalIndex(list('cbad'), categories=cats.categories) exp = Series([3, 2, 1, 0], name='xxx', index=exp_index) tm.assert_series_equal(res, exp) # check object dtype handles the Series.name as the same # (tested in test_base.py) s = Series(["a", "b", "c", "c", "c", "b"], name='xxx') res = s.value_counts() exp = Series([3, 2, 1], name='xxx', index=["c", "b", "a"]) tm.assert_series_equal(res, exp)
Example #8
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_value_counts_categorical_ordered(self): # most dtypes are tested in test_base.py values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=True) exp_idx = pd.CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=True) exp = pd.Series([3, 2, 1], index=exp_idx, name='xxx') s = pd.Series(values, name='xxx') tm.assert_series_equal(s.value_counts(), exp) # check CategoricalIndex outputs the same result idx = pd.CategoricalIndex(values, name='xxx') tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = pd.Series(np.array([3., 2., 1]) / 6., index=exp_idx, name='xxx') tm.assert_series_equal(s.value_counts(normalize=True), exp) tm.assert_series_equal(idx.value_counts(normalize=True), exp)
Example #9
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_unstack_categorical(): # GH11558 (example is taken from the original issue) df = pd.DataFrame({'a': range(10), 'medium': ['A', 'B'] * 5, 'artist': list('XYXXY') * 2}) df['medium'] = df['medium'].astype('category') gcat = df.groupby( ['artist', 'medium'], observed=False)['a'].count().unstack() result = gcat.describe() exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False, name='medium') tm.assert_index_equal(result.columns, exp_columns) tm.assert_categorical_equal(result.columns.values, exp_columns.values) result = gcat['A'] + gcat['B'] expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist')) tm.assert_series_equal(result, expected)
Example #10
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_map_with_dict_or_series(self): orig_values = ['a', 'B', 1, 'a'] new_values = ['one', 2, 3.0, 'one'] cur_index = pd.CategoricalIndex(orig_values, name='XXX') expected = pd.CategoricalIndex(new_values, name='XXX', categories=[3.0, 2, 'one']) mapper = pd.Series(new_values[:-1], index=orig_values[:-1]) output = cur_index.map(mapper) # Order of categories in output can be different tm.assert_index_equal(expected, output) mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])} output = cur_index.map(mapper) # Order of categories in output can be different tm.assert_index_equal(expected, output)
Example #11
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_construction_with_dtype(self): # specify dtype ci = self.create_index(categories=list('abc')) result = Index(np.array(ci), dtype='category') tm.assert_index_equal(result, ci, exact=True) result = Index(np.array(ci).tolist(), dtype='category') tm.assert_index_equal(result, ci, exact=True) # these are generally only equal when the categories are reordered ci = self.create_index() result = Index( np.array(ci), dtype='category').reorder_categories(ci.categories) tm.assert_index_equal(result, ci, exact=True) # make sure indexes are handled expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True) idx = Index(range(3)) result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True)
Example #12
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_map_dictlike(idx, mapper): if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): pytest.skip("skipping tests for {}".format(type(idx))) identity = mapper(idx.values, idx) # we don't infer to UInt64 for a dict if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): expected = idx.astype('int64') else: expected = idx result = idx.map(identity) tm.assert_index_equal(result, expected) # empty mappable expected = pd.Index([np.nan] * len(idx)) result = idx.map(mapper(expected, idx)) tm.assert_index_equal(result, expected)
Example #13
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_bar_categorical(self): # GH 13019 df1 = pd.DataFrame(np.random.randn(6, 5), index=pd.Index(list('ABCDEF')), columns=pd.Index(list('abcde'))) # categorical index must behave the same df2 = pd.DataFrame(np.random.randn(6, 5), index=pd.CategoricalIndex(list('ABCDEF')), columns=pd.CategoricalIndex(list('abcde'))) for df in [df1, df2]: ax = df.plot.bar() ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) assert ax.get_xlim() == (-0.5, 5.5) # check left-edge of bars assert ax.patches[0].get_x() == -0.25 assert ax.patches[-1].get_x() == 5.15 ax = df.plot.bar(stacked=True) tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) assert ax.get_xlim() == (-0.5, 5.5) assert ax.patches[0].get_x() == -0.25 assert ax.patches[-1].get_x() == 4.75
Example #14
Source File: test_sorting.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_argsort(idx): result = np.argsort(idx) expected = idx.argsort() tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as # defined in pandas.core.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(idx), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(idx, axis=1) msg = "the 'kind' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(idx, kind='mergesort') msg = "the 'order' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(idx, order=('a', 'b'))
Example #15
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_contains(self): ci = self.create_index(categories=list('cabdef')) assert 'a' in ci assert 'z' not in ci assert 'e' not in ci assert np.nan not in ci # assert codes NOT in index assert 0 not in ci assert 1 not in ci ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) assert np.nan in ci
Example #16
Source File: test_union_categoricals.py From recruit with Apache License 2.0 | 6 votes |
def test_union_categorical_unwrap(self): # GH 14173 c1 = Categorical(['a', 'b']) c2 = pd.Series(['b', 'c'], dtype='category') result = union_categoricals([c1, c2]) expected = Categorical(['a', 'b', 'b', 'c']) tm.assert_categorical_equal(result, expected) c2 = CategoricalIndex(c2) result = union_categoricals([c1, c2]) tm.assert_categorical_equal(result, expected) c1 = Series(c1) result = union_categoricals([c1, c2]) tm.assert_categorical_equal(result, expected) with pytest.raises(TypeError): union_categoricals([c1, ['a', 'b', 'c']])
Example #17
Source File: test_pivot.py From recruit with Apache License 2.0 | 6 votes |
def test_crosstab_with_categorial_columns(self): # GH 8860 df = pd.DataFrame({'MAKE': ['Honda', 'Acura', 'Tesla', 'Honda', 'Honda', 'Acura'], 'MODEL': ['Sedan', 'Sedan', 'Electric', 'Pickup', 'Sedan', 'Sedan']}) categories = ['Sedan', 'Electric', 'Pickup'] df['MODEL'] = (df['MODEL'].astype('category') .cat.set_categories(categories)) result = pd.crosstab(df['MAKE'], df['MODEL']) expected_index = pd.Index(['Acura', 'Honda', 'Tesla'], name='MAKE') expected_columns = pd.CategoricalIndex(categories, categories=categories, ordered=False, name='MODEL') expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]] expected = pd.DataFrame(expected_data, index=expected_index, columns=expected_columns) tm.assert_frame_equal(result, expected)
Example #18
Source File: test_constructor.py From recruit with Apache License 2.0 | 6 votes |
def test_from_arrays_index_series_categorical(): # GH13743 idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) result = pd.MultiIndex.from_arrays([idx1, idx2]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) tm.assert_index_equal(result3.get_level_values(0), idx1) tm.assert_index_equal(result3.get_level_values(1), idx2)
Example #19
Source File: test_pivot.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_aggfunc(self, observed): # GH 9534 df = pd.DataFrame({"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}) df["C1"] = df["C1"].astype("category") result = df.pivot_table("V", index="C1", columns="C2", dropna=observed, aggfunc="count") expected_index = pd.CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], ordered=False, name='C1') expected_columns = pd.Index(['a', 'b'], name='C2') expected_data = np.array([[1., np.nan], [1., np.nan], [np.nan, 2.]]) expected = pd.DataFrame(expected_data, index=expected_index, columns=expected_columns) tm.assert_frame_equal(result, expected)
Example #20
Source File: test_reshape.py From recruit with Apache License 2.0 | 6 votes |
def test_preserve_categorical_dtype(self): # GH13854 for ordered in [False, True]: cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered) midx = pd.MultiIndex(levels=[['a'], cidx], codes=[[0, 0], [0, 1]]) df = DataFrame([[10, 11]], index=midx) expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], index=midx, columns=cidx) from pandas.core.reshape.reshape import make_axis_dummies result = make_axis_dummies(df) tm.assert_frame_equal(result, expected) result = make_axis_dummies(df, transform=lambda x: x) tm.assert_frame_equal(result, expected)
Example #21
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_delete(self): ci = self.create_index() categories = ci.categories result = ci.delete(0) expected = CategoricalIndex(list('abbca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) result = ci.delete(-1) expected = CategoricalIndex(list('aabbc'), categories=categories) tm.assert_index_equal(result, expected, exact=True) with pytest.raises((IndexError, ValueError)): # Either depending on NumPy version ci.delete(10)
Example #22
Source File: test_category.py From recruit with Apache License 2.0 | 6 votes |
def test_astype(self): ci = self.create_index() result = ci.astype(object) tm.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class assert result.equals(ci) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex(Categorical.from_codes( [0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex(result.values) tm.assert_index_equal(result, expected)
Example #23
Source File: common.py From recruit with Apache License 2.0 | 6 votes |
def test_astype_category(self, copy, name, ordered): # GH 18630 index = self.create_index() if name: index = index.rename(name) # standard categories dtype = CategoricalDtype(ordered=ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, ordered=ordered) tm.assert_index_equal(result, expected) # non-standard categories dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, dtype=dtype) tm.assert_index_equal(result, expected) if ordered is False: # dtype='category' defaults to ordered=False, so only test once result = index.astype('category', copy=copy) expected = CategoricalIndex(index.values, name=name) tm.assert_index_equal(result, expected)
Example #24
Source File: test_astype.py From recruit with Apache License 2.0 | 5 votes |
def test_astype_category(self): obj = pd.period_range("2000", periods=2) result = obj.astype('category') expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"), pd.Period('2000-01-02', freq="D")]) tm.assert_index_equal(result, expected) result = obj._data.astype('category') expected = expected.values tm.assert_categorical_equal(result, expected)
Example #25
Source File: test_astype.py From recruit with Apache License 2.0 | 5 votes |
def test_astype_category(self, tz): obj = pd.date_range("2000", periods=2, tz=tz) result = obj.astype('category') expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz), pd.Timestamp('2000-01-02', tz=tz)]) tm.assert_index_equal(result, expected) result = obj._data.astype('category') expected = expected.values tm.assert_categorical_equal(result, expected)
Example #26
Source File: test_category.py From recruit with Apache License 2.0 | 5 votes |
def test_fillna_categorical(self): # GH 11343 idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') # fill by value in categories exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError msg = 'fill value must be in categories' with pytest.raises(ValueError, match=msg): idx.fillna(2.0)
Example #27
Source File: test_get_set.py From recruit with Apache License 2.0 | 5 votes |
def test_set_levels_categorical(ordered): # GH13854 index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) cidx = CategoricalIndex(list("bac"), ordered=ordered) result = index.set_levels(cidx, 0) expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) tm.assert_index_equal(result, expected) result_lvl = result.get_level_values(0) expected_lvl = CategoricalIndex(list("bacb"), categories=cidx.categories, ordered=cidx.ordered) tm.assert_index_equal(result_lvl, expected_lvl)
Example #28
Source File: test_category.py From recruit with Apache License 2.0 | 5 votes |
def test_engine_type(self, dtype, engine_type): if dtype != np.int64: # num. of uniques required to push CategoricalIndex.codes to a # dtype (128 categories required for .codes dtype to be int16 etc.) num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] ci = pd.CategoricalIndex(range(num_uniques)) else: # having 2**32 - 2**31 categories would be very memory-intensive, # so we cheat a bit with the dtype ci = pd.CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1) ci.values._codes = ci.values._codes.astype('int64') assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type)
Example #29
Source File: test_category.py From recruit with Apache License 2.0 | 5 votes |
def test_take_invalid_kwargs(self): idx = pd.CategoricalIndex([1, 2, 3], name='foo') indices = [1, 0, -1] msg = r"take\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): idx.take(indices, mode='clip')
Example #30
Source File: test_indexing.py From recruit with Apache License 2.0 | 5 votes |
def test_mask_with_boolean(index): s = Series(range(3)) idx = Categorical([True, False, True]) if index: idx = CategoricalIndex(idx) assert com.is_bool_indexer(idx) result = s[idx] expected = s[idx.astype('object')] tm.assert_series_equal(result, expected)