Python pandas.CategoricalDtype() Examples
The following are 30
code examples of pandas.CategoricalDtype().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: dataserializer.py From mars with Apache License 2.0 | 6 votes |
def mars_serialize_context(): global _serialize_context if _serialize_context is None: ctx = pyarrow.default_serialization_context() ctx.register_type(SparseNDArray, 'mars.SparseNDArray', custom_serializer=_serialize_sparse_nd_array, custom_deserializer=_deserialize_sparse_nd_array) ctx.register_type(GroupByWrapper, 'pandas.GroupByWrapper', custom_serializer=_serialize_groupby_wrapper, custom_deserializer=_deserialize_groupby_wrapper) ctx.register_type(pd.Interval, 'pandas.Interval', custom_serializer=_serialize_pandas_interval, custom_deserializer=_deserialize_pandas_interval) ctx.register_type(pd.Categorical, 'pandas.Categorical', custom_serializer=_serialze_pandas_categorical, custom_deserializer=_deserialize_pandas_categorical) ctx.register_type(pd.CategoricalDtype, 'pandas.CategoricalDtype', custom_serializer=_serialize_pandas_categorical_dtype, custom_deserializer=_deserialize_pandas_categorical_dtype) _apply_pyarrow_serialization_patch(ctx) if vineyard is not None: # pragma: no cover vineyard.register_vineyard_serialize_context(ctx) _serialize_context = ctx return _serialize_context
Example #2
Source File: series.py From modin with Apache License 2.0 | 6 votes |
def ravel(self, order="C"): """ Returns the flattened containing data as ndarray. Parameters ---------- order : {'C', 'F', 'A', 'K'}, optional Returns ---------- numpy.ndarray or ndarray-like Flattened data of the Series. """ data = self._query_compiler.to_numpy().flatten(order=order) if isinstance(self.dtype, pandas.CategoricalDtype): data = pandas.Categorical(data, dtype=self.dtype) return data
Example #3
Source File: utils.py From mars with Apache License 2.0 | 6 votes |
def build_series(series_obj, fill_value=1, size=1): empty_series = build_empty_series(series_obj.dtype, index=series_obj.index_value.to_pandas()[:0]) record = _generate_value(series_obj.dtype, fill_value) if isinstance(empty_series.index, pd.MultiIndex): index = tuple(_generate_value(level.dtype, fill_value) for level in empty_series.index.levels) empty_series.loc[index, ] = record else: if isinstance(empty_series.index.dtype, pd.CategoricalDtype): index = None else: index = _generate_value(empty_series.index.dtype, fill_value) empty_series.loc[index] = record empty_series = pd.concat([empty_series] * size) # make sure dtype correct for MultiIndex empty_series = empty_series.astype(series_obj.dtype, copy=False) return empty_series
Example #4
Source File: _pandas_loaders.py From pymapd with Apache License 2.0 | 6 votes |
def get_mapd_type_from_known(dtype): """For cases where pandas type system matches""" if is_bool_dtype(dtype): return 'BOOL' elif is_integer_dtype(dtype): if dtype.itemsize <= 1: return 'TINYINT' elif dtype.itemsize == 2: return 'SMALLINT' elif dtype.itemsize == 4: return 'INT' else: return 'BIGINT' elif is_float_dtype(dtype): if dtype.itemsize <= 4: return 'FLOAT' else: return 'DOUBLE' elif is_datetime64_any_dtype(dtype): return 'TIMESTAMP' elif isinstance(dtype, pd.CategoricalDtype): return 'STR' else: raise TypeError("Unhandled type {}".format(dtype))
Example #5
Source File: filtering_fe_autotype.py From dash-docs with MIT License | 6 votes |
def table_type(df_column): # Note - this only works with Pandas >= 1.0.0 if sys.version_info < (3, 0): # Pandas 1.0.0 does not support Python 2 return 'any' if isinstance(df_column.dtype, pd.DatetimeTZDtype): return 'datetime', elif (isinstance(df_column.dtype, pd.StringDtype) or isinstance(df_column.dtype, pd.BooleanDtype) or isinstance(df_column.dtype, pd.CategoricalDtype) or isinstance(df_column.dtype, pd.PeriodDtype)): return 'text' elif (isinstance(df_column.dtype, pd.SparseDtype) or isinstance(df_column.dtype, pd.IntervalDtype) or isinstance(df_column.dtype, pd.Int8Dtype) or isinstance(df_column.dtype, pd.Int16Dtype) or isinstance(df_column.dtype, pd.Int32Dtype) or isinstance(df_column.dtype, pd.Int64Dtype)): return 'numeric' else: return 'any'
Example #6
Source File: rewrites.py From sdc with BSD 2-Clause "Simplified" License | 6 votes |
def check_dtype_is_categorical(self, expr, func_ir, block, typemap, calltypes): dtype_var = None for name, var in expr.kws: if name == 'dtype': dtype_var = var if not dtype_var: return False dtype_var_def = guard(get_definition, func_ir, dtype_var) is_alias = isinstance(dtype_var_def, ir.Const) and dtype_var_def.value == 'category' is_categoricaldtype = (hasattr(dtype_var_def, 'func') and func_ir.infer_constant(dtype_var_def.func) == pd.CategoricalDtype) if not (is_alias or is_categoricaldtype): return False return True
Example #7
Source File: pandas_support.py From sdc with BSD 2-Clause "Simplified" License | 6 votes |
def from_dtype(pdtype): """ Return a Numba Type instance corresponding to the given Pandas *dtype*. NotImplementedError is raised if unsupported Pandas dtypes. """ # TODO: use issubclass if isinstance(pdtype, pd.CategoricalDtype): if pdtype.categories is None: categories = None else: categories = list(pdtype.categories) return CategoricalDtypeType(categories=categories, ordered=pdtype.ordered) raise NotImplementedError("%r cannot be represented as a Numba type" % (pdtype,))
Example #8
Source File: file_reader.py From modin with Apache License 2.0 | 6 votes |
def read(cls, *args, **kwargs): query_compiler = cls._read(*args, **kwargs) # TODO (devin-petersohn): Make this section more general for non-pandas kernel # implementations. if partition_format.get().lower() != "pandas": raise NotImplementedError("FIXME") import pandas if hasattr(query_compiler, "dtypes") and any( isinstance(t, pandas.CategoricalDtype) for t in query_compiler.dtypes ): dtypes = query_compiler.dtypes return query_compiler.astype( { t: dtypes[t] for t in dtypes.index if isinstance(dtypes[t], pandas.CategoricalDtype) } ) return query_compiler
Example #9
Source File: csv_ext.py From sdc with BSD 2-Clause "Simplified" License | 6 votes |
def _get_dtype_str(t): dtype = t.dtype if isinstance(t, Categorical): # return categorical representation # for some reason pandas and pyarrow read_csv() return CategoricalDtype with # ordered=False in case when dtype is with ordered=None return str(t).replace('ordered=None', 'ordered=False') if dtype == types.NPDatetime('ns'): dtype = 'NPDatetime("ns")' if t == string_array_type: # HACK: add string_array_type to numba.types # FIXME: fix after Numba #3372 is resolved types.string_array_type = string_array_type return 'string_array_type' return '{}[::1]'.format(dtype)
Example #10
Source File: test_categoricaldtype.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_no_order(self): @nb.njit def func(): return pd.CategoricalDtype(categories=('b', 'a')) boxed = func() assert(boxed == self._pd_dtype(ordered=False))
Example #11
Source File: test_categoricaldtype.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def _pd_dtype(self, ordered=True): return pd.CategoricalDtype(categories=['b', 'a'], ordered=ordered)
Example #12
Source File: test_categoricaldtype.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_no_categories(self): @nb.njit def func(): return pd.CategoricalDtype() boxed = func() expected = pd.CategoricalDtype(ordered=None) assert(boxed == expected) assert(boxed.categories == expected.categories) assert(boxed.ordered == expected.ordered)
Example #13
Source File: test_validate.py From cjworkbench with GNU Affero General Public License v3.0 | 5 votes |
def test_unused_categories(self): with self.assertRaisesRegex(ValueError, "unused category 'b'"): validate_dataframe( pd.DataFrame({"foo": ["a", "a"]}, dtype=pd.CategoricalDtype(["a", "b"])) )
Example #14
Source File: test_validate.py From cjworkbench with GNU Affero General Public License v3.0 | 5 votes |
def test_null_is_not_a_category(self): # pd.CategoricalDtype means storing nulls as -1. Don't consider -1 when # counting the used categories. with self.assertRaisesRegex(ValueError, "unused category 'b'"): validate_dataframe( pd.DataFrame( {"foo": ["a", None]}, dtype=pd.CategoricalDtype(["a", "b"]) ) )
Example #15
Source File: test_parquet.py From kartothek with MIT License | 5 votes |
def test_read_categorical(store): df = pd.DataFrame({"col": ["a"]}).astype({"col": "category"}) serialiser = ParquetSerializer() key = serialiser.store(store, "prefix", df) df = serialiser.restore_dataframe(store, key) assert df.dtypes["col"] == "O" df = serialiser.restore_dataframe(store, key, categories=["col"]) assert df.dtypes["col"] == pd.CategoricalDtype(["a"], ordered=False)
Example #16
Source File: test_parquet.py From kartothek with MIT License | 5 votes |
def test_read_categorical_empty(store): df = pd.DataFrame({"col": ["a"]}).astype({"col": "category"}).iloc[:0] serialiser = ParquetSerializer() key = serialiser.store(store, "prefix", df) df = serialiser.restore_dataframe(store, key) assert df.dtypes["col"] == "O" df = serialiser.restore_dataframe(store, key, categories=["col"]) assert df.dtypes["col"] == pd.CategoricalDtype([], ordered=False)
Example #17
Source File: label.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, y: Union[ArrayLike, SeriesType]) -> "LabelEncoder": y = self._check_array(y) if isinstance(y, da.Array): classes_ = _encode_dask_array(y) self.classes_ = classes_.compute() self.dtype_: Optional[pd.CategoricalDtype] = None elif _is_categorical(y): self.classes_ = _encode_categorical(y) self.dtype_ = y.dtype else: self.dtype_ = None return super(LabelEncoder, self).fit(y) return self
Example #18
Source File: parsers.py From modin with Apache License 2.0 | 5 votes |
def find_common_type_cat(types): if all(isinstance(t, pandas.CategoricalDtype) for t in types): if all(t.ordered for t in types): return pandas.CategoricalDtype( np.sort(np.unique([c for t in types for c in t.categories])[0]), ordered=True, ) return union_categoricals( [pandas.Categorical([], dtype=t) for t in types], sort_categories=all(t.ordered for t in types), ).dtype else: return find_common_type(types)
Example #19
Source File: geometric_data.py From gempy with GNU Lesser General Public License v3.0 | 5 votes |
def map_data_from_series(self, series, attribute: str, idx=None): """ Map columns from the :class:`Series` data frame to a :class:`GeometricData` data frame. Args: series (:class:`Series`): [s0] attribute (str): column to be mapped from the :class:`Series` to the :class:`GeometricData`. idx (Optional[int, list[int]): If passed, list of indices of the :class:`GeometricData` that will be mapped. Returns: :class:GeometricData """ if idx is None: idx = self.df.index idx = np.atleast_1d(idx) if attribute in ['id', 'order_series']: self.df.loc[idx, attribute] = self.df['series'].map(series.df[attribute]).astype(int) else: self.df.loc[idx, attribute] = self.df['series'].map(series.df[attribute]) if type(self.df['order_series'].dtype) is pn.CategoricalDtype: self.df['order_series'].cat.remove_unused_categories(inplace=True) return self
Example #20
Source File: utils.py From mixed-anomaly with Apache License 2.0 | 5 votes |
def is_column_categorical(column: pd.Series) -> bool: return isinstance(column.dtype, pd.CategoricalDtype) or column.dtype == np.object_
Example #21
Source File: test_categoricaldtype.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_categories_set(self): @nb.njit def func(): return pd.CategoricalDtype(categories={'b', 'a'}, ordered=True) boxed = func() assert(boxed == self._pd_dtype())
Example #22
Source File: test_categoricaldtype.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_categories_list(self): @nb.njit def func(): return pd.CategoricalDtype(categories=['b', 'a'], ordered=True) boxed = func() assert(boxed == self._pd_dtype())
Example #23
Source File: test_series_category.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_CategoricalDtype_list(self): @nb.njit def func(): return pd.Series(data=[1, 2, 3, 2, 1], dtype=pd.CategoricalDtype(categories=[1, 2, 3])) boxed = func() assert(boxed.equals(self._pd_value()))
Example #24
Source File: test_series_category.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def test_constructor_CategoricalDtype(self): @nb.njit def func(): return pd.Series(data=(1, 2, 3, 2, 1), dtype=pd.CategoricalDtype(categories=(1, 2, 3))) boxed = func() assert(boxed.equals(self._pd_value()))
Example #25
Source File: test_io.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def pd_csv_cat2(self, use_pyarrow=False): read_csv = self._read_csv(use_pyarrow) int_type = self._int_type() def test_impl(): ct_dtype = CategoricalDtype(['A', 'B', 'C', 'D']) df = read_csv("csv_data_cat1.csv", names=['C1', 'C2', 'C3'], dtype={'C1': int_type, 'C2': ct_dtype, 'C3': str}, ) return df return test_impl
Example #26
Source File: test_io.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def pd_csv_cat1(self, use_pyarrow=False): read_csv = self._read_csv(use_pyarrow) def test_impl(): names = ['C1', 'C2', 'C3'] ct_dtype = CategoricalDtype(['A', 'B', 'C']) dtypes = {'C1': np.int, 'C2': ct_dtype, 'C3': str} df = read_csv("csv_data_cat1.csv", names=names, dtype=dtypes) return df return test_impl
Example #27
Source File: pdimpl.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def _CategoricalDtype_intrinsic(typingctx, categories, ordered): """ Creates CategoricalDtype object. Assertions: categories - Tuple of literal values or None ordered - literal Bool """ if isinstance(categories, types.NoneType): categories_list = None if isinstance(categories, types.Tuple): categories_list = [c.literal_value for c in categories] if isinstance(ordered, types.NoneType): ordered_value = None if isinstance(ordered, types.Literal): ordered_value = ordered.literal_value return_type = CategoricalDtypeType(categories_list, ordered_value) sig = return_type(categories, ordered) def codegen(context, builder, signature, args): # All CategoricalDtype objects are dummy values in LLVM. # They only exist in the type level. return context.get_dummy_value() return sig, codegen # TODO: move to tools
Example #28
Source File: pdimpl.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def _CategoricalDtype(categories=None, ordered=None): """ Implementation of constructor for pandas CategoricalDtype. """ if isinstance(ordered, types.Literal): ordered_const = ordered.literal_value else: ordered_const = ordered def impl(categories=None, ordered=None): return _CategoricalDtype_intrinsic(categories, ordered_const) return impl
Example #29
Source File: pandas_support.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def as_dtype(nbtype): """ Return a Pandas *dtype* instance corresponding to the given Numba type. NotImplementedError is raised if no correspondence is known. """ nbtype = types.unliteral(nbtype) if isinstance(nbtype, CategoricalDtypeType): return pd.CategoricalDtype(categories=nbtype.categories, ordered=nbtype.ordered) raise NotImplementedError("%r cannot be represented as a Pandas dtype" % (nbtype,))
Example #30
Source File: dataserializer.py From mars with Apache License 2.0 | 5 votes |
def _deserialize_pandas_categorical_dtype(data): return pd.CategoricalDtype(data[0], data[1])