Python pandas.Categorical() Examples
The following are 30
code examples of pandas.Categorical().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_constructors.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_with_generator(self): # This was raising an Error in isna(single_val).any() because isna # returned a scalar for a generator xrange = range exp = Categorical([0, 1, 2]) cat = Categorical((x for x in [0, 1, 2])) tm.assert_categorical_equal(cat, exp) cat = Categorical(xrange(3)) tm.assert_categorical_equal(cat, exp) # This uses xrange internally from pandas.core.index import MultiIndex MultiIndex.from_product([range(5), ['a', 'b', 'c']]) # check that categories accept generators and sequences cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) tm.assert_categorical_equal(cat, exp) cat = Categorical([0, 1, 2], categories=xrange(3)) tm.assert_categorical_equal(cat, exp)
Example #2
Source File: test_missing.py From recruit with Apache License 2.0 | 6 votes |
def test_nan_handling(self): # Nans are represented as -1 in codes c = Categorical(["a", "b", np.nan, "a"]) tm.assert_index_equal(c.categories, Index(["a", "b"])) tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) c[1] = np.nan tm.assert_index_equal(c.categories, Index(["a", "b"])) tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) # Adding nan to categories should make assigned nan point to the # category! c = Categorical(["a", "b", np.nan, "a"]) tm.assert_index_equal(c.categories, Index(["a", "b"])) tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
Example #3
Source File: nameddict.py From pyGSTi with Apache License 2.0 | 6 votes |
def as_dataframe(self): import pandas as _pandas columns = {'value': []} seriestypes = {'value': "unknown"} self._add_to_columns(columns, seriestypes, {}) columns_as_series = {} for colname, lst in columns.items(): seriestype = seriestypes[colname] if seriestype == 'float': s = _np.array(lst, dtype='d') elif seriestype == 'int': s = _np.array(lst, dtype=int) # or pd.Series w/dtype? elif seriestype == 'category': s = _pandas.Categorical(lst) else: s = lst # will infer an object array? columns_as_series[colname] = s df = _pandas.DataFrame(columns_as_series) return df
Example #4
Source File: test_indexing.py From recruit with Apache License 2.0 | 6 votes |
def test_setitem(self): # int/positional c = self.factor.copy() c[0] = 'b' assert c[0] == 'b' c[-1] = 'a' assert c[-1] == 'a' # boolean c = self.factor.copy() indexer = np.zeros(len(c), dtype='bool') indexer[0] = True indexer[-1] = True c[indexer] = 'c' expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], ordered=True) tm.assert_categorical_equal(c, expected)
Example #5
Source File: test_indexing.py From recruit with Apache License 2.0 | 6 votes |
def test_categories_assigments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) tm.assert_index_equal(s.categories, Index([1, 2, 3])) # lengthen with pytest.raises(ValueError): s.categories = [1, 2, 3, 4] # shorten with pytest.raises(ValueError): s.categories = [1, 2] # Combinations of sorted/unique:
Example #6
Source File: test_indexing.py From recruit with Apache License 2.0 | 6 votes |
def non_coercible_categorical(monkeypatch): """ Monkeypatch Categorical.__array__ to ensure no implicit conversion. Raises ------ ValueError When Categorical.__array__ is called. """ # TODO(Categorical): identify other places where this may be # useful and move to a conftest.py def array(self, dtype=None): raise ValueError("I cannot be converted.") with monkeypatch.context() as m: m.setattr(Categorical, "__array__", array) yield
Example #7
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_codes_dtypes(self): # GH 8453 result = Categorical(['foo', 'bar', 'baz']) assert result.codes.dtype == 'int8' result = Categorical(['foo%05d' % i for i in range(400)]) assert result.codes.dtype == 'int16' result = Categorical(['foo%05d' % i for i in range(40000)]) assert result.codes.dtype == 'int32' # adding cats result = Categorical(['foo', 'bar', 'baz']) assert result.codes.dtype == 'int8' result = result.add_categories(['foo%05d' % i for i in range(400)]) assert result.codes.dtype == 'int16' # removing cats result = result.remove_categories(['foo%05d' % i for i in range(300)]) assert result.codes.dtype == 'int8'
Example #8
Source File: test_api.py From recruit with Apache License 2.0 | 6 votes |
def test_ordered_api(self): # GH 9347 cat1 = Categorical(list('acb'), ordered=False) tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c'])) assert not cat1.ordered cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False) tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a'])) assert not cat2.ordered cat3 = Categorical(list('acb'), ordered=True) tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) assert cat3.ordered cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True) tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a'])) assert cat4.ordered
Example #9
Source File: test_api.py From recruit with Apache License 2.0 | 6 votes |
def test_set_ordered(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() assert cat2.ordered cat2.as_unordered(inplace=True) assert not cat2.ordered cat2.as_ordered(inplace=True) assert cat2.ordered assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered cat2.set_ordered(True, inplace=True) assert cat2.ordered cat2.set_ordered(False, inplace=True) assert not cat2.ordered # removed in 0.19.0 msg = "can\'t set attribute" with pytest.raises(AttributeError, match=msg): cat.ordered = True with pytest.raises(AttributeError, match=msg): cat.ordered = False
Example #10
Source File: test_api.py From recruit with Apache License 2.0 | 6 votes |
def test_codes_immutable(self): # Codes should be read only c = Categorical(["a", "b", "c", "a", np.nan]) exp = np.array([0, 1, 2, 0, -1], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp) # Assignments to codes should raise with pytest.raises(ValueError): c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') # changes in the codes array should raise codes = c.codes with pytest.raises(ValueError): codes[4] = 1 # But even after getting the codes, the original array should still be # writeable! c[4] = "a" exp = np.array([0, 1, 2, 0, 0], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp) c._codes[4] = 2 exp = np.array([0, 1, 2, 0, 2], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp)
Example #11
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_astype(self, ordered): # string cat = Categorical(list('abbaaccc'), ordered=ordered) result = cat.astype(object) expected = np.array(cat) tm.assert_numpy_array_equal(result, expected) msg = 'could not convert string to float' with pytest.raises(ValueError, match=msg): cat.astype(float) # numeric cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) result = cat.astype(object) expected = np.array(cat, dtype=object) tm.assert_numpy_array_equal(result, expected) result = cat.astype(int) expected = np.array(cat, dtype=np.int) tm.assert_numpy_array_equal(result, expected) result = cat.astype(float) expected = np.array(cat, dtype=np.float) tm.assert_numpy_array_equal(result, expected)
Example #12
Source File: test_api.py From recruit with Apache License 2.0 | 6 votes |
def test_remove_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) # first inplace == False res = cat.remove_categories("c") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) res = cat.remove_categories(["c"]) tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) assert res is None # removal is not in categories with pytest.raises(ValueError): cat.remove_categories(["c"])
Example #13
Source File: test_operators.py From recruit with Apache License 2.0 | 6 votes |
def test_compare_frame(self): # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame data = ["a", "b", 2, "a"] cat = Categorical(data) df = DataFrame(cat) for op in [operator.eq, operator.ne, operator.ge, operator.gt, operator.le, operator.lt]: with pytest.raises(ValueError): # alignment raises unless we transpose op(cat, df) result = cat == df.T expected = DataFrame([[True, True, True, True]]) tm.assert_frame_equal(result, expected) result = cat[::-1] != df.T expected = DataFrame([[False, True, True, False]]) tm.assert_frame_equal(result, expected)
Example #14
Source File: test_sorting.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_argsort(self): c = Categorical([5, 3, 1, 4, 2], ordered=True) expected = np.array([2, 4, 1, 3, 0]) tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, check_dtype=False) msg = "the 'axis' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(c, axis=0) msg = "the 'order' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argsort(c, order='C')
Example #15
Source File: test_constructors.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_tuples_datetimes(self): # numpy will auto reshape when all of the tuples are the # same len, so add an extra one with 2 items and slice it off values = np.array([(Timestamp('2010-01-01'),), (Timestamp('2010-01-02'),), (Timestamp('2010-01-01'),), (Timestamp('2010-01-02'),), ('a', 'b')], dtype=object)[:-1] result = Categorical(values) expected = Index([(Timestamp('2010-01-01'),), (Timestamp('2010-01-02'),)], tupleize_cols=False) tm.assert_index_equal(result.categories, expected)
Example #16
Source File: test_operators.py From recruit with Apache License 2.0 | 5 votes |
def test_unordered_different_categories_raises(self): c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False) with pytest.raises(TypeError, match=("Categoricals can " "only be compared")): c1 == c2
Example #17
Source File: test_operators.py From recruit with Apache License 2.0 | 5 votes |
def test_reflected_comparison_with_scalars(self): # GH8658 cat = Categorical([1, 2, 3], ordered=True) tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True]))
Example #18
Source File: test_operators.py From recruit with Apache License 2.0 | 5 votes |
def test_datetime_categorical_comparison(self): dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True) tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True]))
Example #19
Source File: test_operators.py From recruit with Apache License 2.0 | 5 votes |
def test_categories_none_comparisons(self): factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], ordered=True) tm.assert_categorical_equal(factor, self.factor)
Example #20
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_fill_value_new_raises(self): # https://github.com/pandas-dev/pandas/issues/23296 cat = pd.Categorical(['a', 'b', 'c']) xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." with pytest.raises(TypeError, match=xpr): cat.take([0, 1, -1], fill_value='d', allow_fill=True)
Example #21
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_fill_with_negative_one(self): # -1 was a category cat = pd.Categorical([-1, 0, 1]) result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1]) tm.assert_categorical_equal(result, expected)
Example #22
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_allow_fill(self): # https://github.com/pandas-dev/pandas/issues/23296 cat = pd.Categorical(['a', 'a', 'b']) result = cat.take([0, -1, -1], allow_fill=True) expected = pd.Categorical(['a', np.nan, np.nan], categories=['a', 'b']) tm.assert_categorical_equal(result, expected)
Example #23
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_positional_take_unobserved(self, ordered): cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'], ordered=ordered) result = cat.take([1, 0], allow_fill=False) expected = pd.Categorical(['b', 'a'], categories=cat.categories, ordered=ordered) tm.assert_categorical_equal(result, expected)
Example #24
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_positional_take(self, ordered): cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'], ordered=ordered) result = cat.take([0, 1, 2], allow_fill=False) expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories, ordered=ordered) tm.assert_categorical_equal(result, expected)
Example #25
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_empty(self, allow_fill): # https://github.com/pandas-dev/pandas/issues/20664 cat = pd.Categorical([], categories=['a', 'b']) with pytest.raises(IndexError): cat.take([0], allow_fill=allow_fill)
Example #26
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_positive_no_warning(self): cat = pd.Categorical(['a', 'b']) with tm.assert_produces_warning(None): cat.take([0, 0])
Example #27
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_take_warns(self): cat = pd.Categorical(['a', 'b']) with tm.assert_produces_warning(FutureWarning): cat.take([0, -1])
Example #28
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_isin_empty(empty): s = pd.Categorical(["a", "b"]) expected = np.array([False, False], dtype=bool) result = s.isin(empty) tm.assert_numpy_array_equal(expected, result)
Example #29
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_isin_cats(): # GH2003 cat = pd.Categorical(["a", "b", np.nan]) result = cat.isin(["a", np.nan]) expected = np.array([True, False, True], dtype=bool) tm.assert_numpy_array_equal(expected, result) result = cat.isin(["a", "c"]) expected = np.array([True, False, False], dtype=bool) tm.assert_numpy_array_equal(expected, result)
Example #30
Source File: test_operators.py From recruit with Apache License 2.0 | 5 votes |
def test_compare_unordered_different_order(self): # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- # 349290078 a = pd.Categorical(['a'], categories=['a', 'b']) b = pd.Categorical(['b'], categories=['b', 'a']) assert not a.equals(b)