Python pandas.core.algorithms.unique1d() Examples
The following are 24
code examples of pandas.core.algorithms.unique1d().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.algorithms
, or try the search function
.
Example #1
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) tm.assert_series_equal(df.nunique(dropna=False), Series({'A': 1, 'B': 3, 'C': 3})) tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}))
Example #2
Source File: test_analytics.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) tm.assert_series_equal(df.nunique(dropna=False), Series({'A': 1, 'B': 3, 'C': 3})) tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}))
Example #3
Source File: base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def unique(self): values = self._values if hasattr(values, 'unique'): result = values.unique() else: from pandas.core.algorithms import unique1d result = unique1d(values) return result
Example #4
Source File: test_analytics.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_nunique(self): f = lambda s: len(algorithms.unique1d(s.dropna())) self._check_stat_op('nunique', f, has_skipna=False, check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) tm.assert_series_equal(df.nunique(dropna=False), Series({'A': 1, 'B': 3, 'C': 3})) tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}))
Example #5
Source File: base.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def unique(self): values = self._values if hasattr(values, 'unique'): result = values.unique() else: from pandas.core.algorithms import unique1d result = unique1d(values) return result
Example #6
Source File: test_analytics.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_nunique(self): f = lambda s: len(algorithms.unique1d(s.dropna())) self._check_stat_op('nunique', f, has_skipna=False, check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) tm.assert_series_equal(df.nunique(dropna=False), Series({'A': 1, 'B': 3, 'C': 3})) tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}))
Example #7
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def unique(self): values = self._values if hasattr(values, 'unique'): result = values.unique() else: from pandas.core.algorithms import unique1d result = unique1d(values) return result
Example #8
Source File: period.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def unique(self, level=None): # override the Index.unique method for performance GH#23083 if level is not None: # this should never occur, but is retained to make the signature # match Index.unique self._validate_index_level(level) values = self._ndarray_values result = unique1d(values) return self._shallow_copy(result)
Example #9
Source File: base.py From vnpy_crypto with MIT License | 5 votes |
def unique(self): values = self._values if hasattr(values, 'unique'): result = values.unique() else: from pandas.core.algorithms import unique1d result = unique1d(values) return result
Example #10
Source File: test_analytics.py From vnpy_crypto with MIT License | 5 votes |
def test_nunique(self): f = lambda s: len(algorithms.unique1d(s.dropna())) self._check_stat_op('nunique', f, has_skipna=False, check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) tm.assert_series_equal(df.nunique(dropna=False), Series({'A': 1, 'B': 3, 'C': 3})) tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}))
Example #11
Source File: base.py From recruit with Apache License 2.0 | 5 votes |
def unique(self): values = self._values if hasattr(values, 'unique'): result = values.unique() else: from pandas.core.algorithms import unique1d result = unique1d(values) return result
Example #12
Source File: period.py From recruit with Apache License 2.0 | 5 votes |
def unique(self, level=None): # override the Index.unique method for performance GH#23083 if level is not None: # this should never occur, but is retained to make the signature # match Index.unique self._validate_index_level(level) values = self._ndarray_values result = unique1d(values) return self._shallow_copy(result)
Example #13
Source File: groupby.py From vnpy_crypto with MIT License | 4 votes |
def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in com._not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values if not not_indexed_same: result = concat(values, axis=self.axis) ax = self._selected_obj._get_axis(self.axis) if isinstance(result, Series): result = result.reindex(ax) else: # this is a very unfortunate situation # we have a multi-index that is NOT lexsorted # and we have a result which is duplicated # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: indexer = algorithms.unique1d( result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: result = result.reindex(ax, axis=self.axis) elif self.group_keys: values = reset_identity(values) if self.as_index: # possible MI return case group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names, sort=False) else: # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) result = concat(values, axis=self.axis, keys=keys) else: values = reset_identity(values) result = concat(values, axis=self.axis) if (isinstance(result, Series) and getattr(self, '_selection_name', None) is not None): result.name = self._selection_name return result
Example #14
Source File: categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 4 votes |
def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are unique. Unused categories are NOT returned. - unordered category: values and categories are sorted by appearance order. - ordered category: values are sorted by appearance order, categories keeps existing order. Returns ------- unique values : ``Categorical`` Examples -------- An unordered Categorical will return categories in the order of appearance. >>> pd.Categorical(list('baabc')) [b, a, c] Categories (3, object): [b, a, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. >>> pd.Categorical(list('baabc'), ... categories=list('abc'), ... ordered=True) [b, a, c] Categories (3, object): [a < b < c] See Also -------- unique CategoricalIndex.unique Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() # keep nan in codes cat._codes = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: take_codes = np.sort(take_codes) return cat.set_categories(cat.categories.take(take_codes))
Example #15
Source File: categorical.py From vnpy_crypto with MIT License | 4 votes |
def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are unique. Unused categories are NOT returned. - unordered category: values and categories are sorted by appearance order. - ordered category: values are sorted by appearance order, categories keeps existing order. Returns ------- unique values : ``Categorical`` Examples -------- An unordered Categorical will return categories in the order of appearance. >>> pd.Categorical(list('baabc')) [b, a, c] Categories (3, object): [b, a, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. >>> pd.Categorical(list('baabc'), ... categories=list('abc'), ... ordered=True) [b, a, c] Categories (3, object): [a < b < c] See Also -------- unique CategoricalIndex.unique Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() # keep nan in codes cat._codes = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: take_codes = np.sort(take_codes) return cat.set_categories(cat.categories.take(take_codes))
Example #16
Source File: groupby.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 4 votes |
def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in com._not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values if not not_indexed_same: result = concat(values, axis=self.axis) ax = self._selected_obj._get_axis(self.axis) if isinstance(result, Series): result = result.reindex(ax) else: # this is a very unfortunate situation # we have a multi-index that is NOT lexsorted # and we have a result which is duplicated # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: indexer = algorithms.unique1d( result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: result = result.reindex(ax, axis=self.axis) elif self.group_keys: values = reset_identity(values) if self.as_index: # possible MI return case group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names, sort=False) else: # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) result = concat(values, axis=self.axis, keys=keys) else: values = reset_identity(values) result = concat(values, axis=self.axis) if (isinstance(result, Series) and getattr(self, '_selection_name', None) is not None): result.name = self._selection_name return result
Example #17
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def intersection(self, other): """ Form the intersection of two Index objects. This returns a new Index with elements common to the index and `other`, preserving the order of the calling index. Parameters ---------- other : Index or array-like Returns ------- intersection : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) Int64Index([3, 4], dtype='int64') """ self._assert_can_do_setop(other) other = _ensure_index(other) if self.equals(other): return self._get_consensus_name(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) if self.is_monotonic and other.is_monotonic: try: result = self._inner_indexer(self._values, other._values)[0] return self._wrap_union_result(other, result) except TypeError: pass try: indexer = Index(other._values).get_indexer(self._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except Exception: # duplicates indexer = algos.unique1d( Index(other._values).get_indexer_non_unique(self._values)[0]) indexer = indexer[indexer != -1] taken = other.take(indexer) if self.name != other.name: taken.name = None return taken
Example #18
Source File: groupby.py From recruit with Apache License 2.0 | 4 votes |
def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in com._not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values if not not_indexed_same: result = concat(values, axis=self.axis) ax = self._selected_obj._get_axis(self.axis) if isinstance(result, Series): result = result.reindex(ax) else: # this is a very unfortunate situation # we have a multi-index that is NOT lexsorted # and we have a result which is duplicated # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: indexer = algorithms.unique1d( result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: result = result.reindex(ax, axis=self.axis) elif self.group_keys: values = reset_identity(values) if self.as_index: # possible MI return case group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names, sort=False) else: # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) result = concat(values, axis=self.axis, keys=keys) else: values = reset_identity(values) result = concat(values, axis=self.axis) if (isinstance(result, Series) and getattr(self, '_selection_name', None) is not None): result.name = self._selection_name return result
Example #19
Source File: groupby.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in _not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values if not not_indexed_same: result = concat(values, axis=self.axis) ax = self._selected_obj._get_axis(self.axis) if isinstance(result, Series): result = result.reindex(ax) else: # this is a very unfortunate situation # we have a multi-index that is NOT lexsorted # and we have a result which is duplicated # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: indexer = algorithms.unique1d( result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: result = result.reindex(ax, axis=self.axis) elif self.group_keys: values = reset_identity(values) if self.as_index: # possible MI return case group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names) else: # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) result = concat(values, axis=self.axis, keys=keys) else: values = reset_identity(values) result = concat(values, axis=self.axis) if (isinstance(result, Series) and getattr(self, '_selection_name', None) is not None): result.name = self._selection_name return result
Example #20
Source File: categorical.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are unique. Unused categories are NOT returned. - unordered category: values and categories are sorted by appearance order. - ordered category: values are sorted by appearance order, categories keeps existing order. Returns ------- unique values : ``Categorical`` Examples -------- An unordered Categorical will return categories in the order of appearance. >>> pd.Categorical(list('baabc')) [b, a, c] Categories (3, object): [b, a, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. >>> pd.Categorical(list('baabc'), ... categories=list('abc'), ... ordered=True) [b, a, c] Categories (3, object): [a < b < c] See Also -------- unique CategoricalIndex.unique Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() # keep nan in codes cat._codes = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: take_codes = sorted(take_codes) return cat.set_categories(cat.categories.take(take_codes))
Example #21
Source File: base.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def intersection(self, other): """ Form the intersection of two Index objects. This returns a new Index with elements common to the index and `other`, preserving the order of the calling index. Parameters ---------- other : Index or array-like Returns ------- intersection : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) Int64Index([3, 4], dtype='int64') """ self._assert_can_do_setop(other) other = _ensure_index(other) if self.equals(other): return self._get_consensus_name(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) if self.is_monotonic and other.is_monotonic: try: result = self._inner_indexer(self._values, other._values)[0] return self._wrap_union_result(other, result) except TypeError: pass try: indexer = Index(other._values).get_indexer(self._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates indexer = algos.unique1d( Index(other._values).get_indexer_non_unique(self._values)[0]) indexer = indexer[indexer != -1] taken = other.take(indexer) if self.name != other.name: taken.name = None return taken
Example #22
Source File: groupby.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in _not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values if not not_indexed_same: result = concat(values, axis=self.axis) ax = self._selected_obj._get_axis(self.axis) if isinstance(result, Series): result = result.reindex(ax) else: # this is a very unfortunate situation # we have a multi-index that is NOT lexsorted # and we have a result which is duplicated # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: indexer = algorithms.unique1d( result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: result = result.reindex(ax, axis=self.axis) elif self.group_keys: values = reset_identity(values) if self.as_index: # possible MI return case group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names) else: # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) result = concat(values, axis=self.axis, keys=keys) else: values = reset_identity(values) result = concat(values, axis=self.axis) if (isinstance(result, Series) and getattr(self, '_selection_name', None) is not None): result.name = self._selection_name return result
Example #23
Source File: categorical.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are unique. Unused categories are NOT returned. - unordered category: values and categories are sorted by appearance order. - ordered category: values are sorted by appearance order, categories keeps existing order. Returns ------- unique values : ``Categorical`` Examples -------- An unordered Categorical will return categories in the order of appearance. >>> pd.Categorical(list('baabc')) [b, a, c] Categories (3, object): [b, a, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. >>> pd.Categorical(list('baabc'), ... categories=list('abc'), ... ordered=True) [b, a, c] Categories (3, object): [a < b < c] See Also -------- unique CategoricalIndex.unique Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() # keep nan in codes cat._codes = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: take_codes = sorted(take_codes) return cat.set_categories(cat.categories.take(take_codes))
Example #24
Source File: categorical.py From recruit with Apache License 2.0 | 4 votes |
def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are unique. Unused categories are NOT returned. - unordered category: values and categories are sorted by appearance order. - ordered category: values are sorted by appearance order, categories keeps existing order. Returns ------- unique values : ``Categorical`` Examples -------- An unordered Categorical will return categories in the order of appearance. >>> pd.Categorical(list('baabc')) [b, a, c] Categories (3, object): [b, a, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. >>> pd.Categorical(list('baabc'), ... categories=list('abc'), ... ordered=True) [b, a, c] Categories (3, object): [a < b < c] See Also -------- unique CategoricalIndex.unique Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() # keep nan in codes cat._codes = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: take_codes = np.sort(take_codes) return cat.set_categories(cat.categories.take(take_codes))