Python pandas.core.algorithms.unique() Examples
The following are 30
code examples of pandas.core.algorithms.unique().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.algorithms
, or try the search function
.
Example #1
Source File: test_algos.py From vnpy_crypto with MIT License | 7 votes |
def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( ['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype
Example #2
Source File: frequencies.py From recruit with Apache License 2.0 | 6 votes |
def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): # return None weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None week_of_months = unique((self.index.day - 1) // 7) # Only attempt to infer up to WOM-4. See #9425 week_of_months = week_of_months[week_of_months < 4] if len(week_of_months) == 0 or len(week_of_months) > 1: return None # get which week week = week_of_months[0] + 1 wd = int_to_weekday[weekdays[0]] return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
Example #3
Source File: multi.py From recruit with Apache License 2.0 | 6 votes |
def _engine(self): # Calculate the number of bits needed to represent labels in each # level, as log2 of their sizes (including -1 for NaN): sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels])) # Sum bit counts, starting from the _right_.... lev_bits = np.cumsum(sizes[::-1])[::-1] # ... in order to obtain offsets such that sorting the combination of # shifted codes (one for each level, resulting in a unique integer) is # equivalent to sorting lexicographically the codes themselves. Notice # that each level needs to be shifted by the number of bits needed to # represent the _previous_ ones: offsets = np.concatenate([lev_bits[1:], [0]]).astype('uint64') # Check the total number of bits needed for our representation: if lev_bits[0] > 64: # The levels would overflow a 64 bit uint - use Python integers: return MultiIndexPyIntEngine(self.levels, self.codes, offsets) return MultiIndexUIntEngine(self.levels, self.codes, offsets)
Example #4
Source File: pytables.py From Computable with MIT License | 6 votes |
def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) labels = _ensure_index(labels) # try not to reindex even if other is provided # if it equals our current index if other is not None: other = _ensure_index(other) if (other is None or labels.equals(other)) and labels.equals(ax): return obj labels = _ensure_index(labels.unique()) if other is not None: labels = labels & _ensure_index(other.unique()) if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj
Example #5
Source File: frequencies.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): # return None weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None week_of_months = unique((self.index.day - 1) // 7) # Only attempt to infer up to WOM-4. See #9425 week_of_months = week_of_months[week_of_months < 4] if len(week_of_months) == 0 or len(week_of_months) > 1: return None # get which week week = week_of_months[0] + 1 wd = int_to_weekday[weekdays[0]] return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
Example #6
Source File: frequencies.py From Computable with MIT License | 6 votes |
def _get_wom_rule(self): # wdiffs = unique(np.diff(self.index.week)) #We also need -47, -49, -48 to catch index spanning year boundary # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): # return None weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None week_of_months = unique((self.index.day - 1) // 7) if len(week_of_months) > 1: return None # get which week week = week_of_months[0] + 1 wd = _weekday_rule_aliases[weekdays[0]] return 'WOM-%d%s' % (week, wd)
Example #7
Source File: pytables.py From vnpy_crypto with MIT License | 6 votes |
def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) labels = _ensure_index(labels) # try not to reindex even if other is provided # if it equals our current index if other is not None: other = _ensure_index(other) if (other is None or labels.equals(other)) and labels.equals(ax): return obj labels = _ensure_index(labels.unique()) if other is not None: labels = _ensure_index(other.unique()) & labels if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj
Example #8
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( ['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000', '2015-01-01T00:00:00.000000000', '2015-01-01T00:00:00.000000000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype
Example #9
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_timedelta64_dtype_array_returned(self): # GH 9431 expected = np.array([31200, 45678, 10000], dtype='m8[ns]') td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678]) result = algos.unique(td_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype s = Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype
Example #10
Source File: pytables.py From vnpy_crypto with MIT License | 6 votes |
def write(self, obj, **kwargs): super(BlockManagerFixed, self).write(obj, **kwargs) data = obj._data if not data.is_consolidated(): data = data.consolidate() self.attrs.ndim = data.ndim for i, ax in enumerate(data.axes): if i == 0: if not ax.is_unique: raise ValueError( "Columns index has to be unique for fixed format") self.write_index('axis%d' % i, ax) # Supporting mixed-type DataFrame objects...nontrivial self.attrs.nblocks = len(data.blocks) for i, blk in enumerate(data.blocks): # I have no idea why, but writing values before items fixed #2299 blk_items = data.items.take(blk.mgr_locs) self.write_array('block%d_values' % i, blk.values, items=blk_items) self.write_index('block%d_items' % i, blk_items)
Example #11
Source File: pytables.py From recruit with Apache License 2.0 | 6 votes |
def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) labels = ensure_index(labels) # try not to reindex even if other is provided # if it equals our current index if other is not None: other = ensure_index(other) if (other is None or labels.equals(other)) and labels.equals(ax): return obj labels = ensure_index(labels.unique()) if other is not None: labels = ensure_index(other.unique()).intersection(labels, sort=False) if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj
Example #12
Source File: test_algos.py From recruit with Apache License 2.0 | 6 votes |
def test_timedelta64_dtype_array_returned(self): # GH 9431 expected = np.array([31200, 45678, 10000], dtype='m8[ns]') td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678]) result = algos.unique(td_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype s = Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype
Example #13
Source File: test_algos.py From recruit with Apache License 2.0 | 6 votes |
def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( ['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000', '2015-01-01T00:00:00.000000000', '2015-01-01T00:00:00.000000000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype
Example #14
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_get_unique(self): s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(s.unique(), exp)
Example #15
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_hashtable_unique(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, 'make' + tm_dtype + 'Index') s = Series(maker(1000)) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan elif htable == ht.PyObjectHashTable: # use different NaN types for object column s.loc[500:502] = [np.nan, None, pd.NaT] # create duplicated selection s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) s_duplicated.values.setflags(write=writable) # drop_duplicates has own cython code (hash_table_func_helper.pxi) # and is tested separately; keeps first occurrence like ht.unique() expected_unique = s_duplicated.drop_duplicates(keep='first').values result_unique = htable().unique(s_duplicated.values) tm.assert_numpy_array_equal(result_unique, expected_unique) # test return_inverse=True # reconstruction can only succeed if the inverse is correct result_unique, result_inverse = htable().unique(s_duplicated.values, return_inverse=True) tm.assert_numpy_array_equal(result_unique, expected_unique) reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
Example #16
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2): # GH 22295 if unique_nulls_fixture is unique_nulls_fixture2: return # skip it, values not unique a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=np.object) result = pd.unique(a) assert result.size == 2 assert a[0] is unique_nulls_fixture assert a[1] is unique_nulls_fixture2
Example #17
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_different_nans(self): # GH 21866 # create different nans from bit-patterns: NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0] NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0] assert NAN1 != NAN1 assert NAN2 != NAN2 a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent result = pd.unique(a) expected = np.array([np.nan]) tm.assert_numpy_array_equal(result, expected)
Example #18
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_signed_zero(self): # GH 21866 a = np.array([-0.0, 0.0]) result = pd.unique(a) expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent tm.assert_numpy_array_equal(result, expected)
Example #19
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_unique_tuples(self, arr, unique): # https://github.com/pandas-dev/pandas/issues/16519 expected = np.empty(len(unique), dtype=object) expected[:] = unique result = pd.unique(arr) tm.assert_numpy_array_equal(result, expected)
Example #20
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_obj_none_preservation(self): # GH 20866 arr = np.array(['foo', None], dtype=object) result = pd.unique(arr) expected = np.array(['foo', None], dtype=object) tm.assert_numpy_array_equal(result, expected, strict_nan=True)
Example #21
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_hashtable_factorize(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, 'make' + tm_dtype + 'Index') s = Series(maker(1000)) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan elif htable == ht.PyObjectHashTable: # use different NaN types for object column s.loc[500:502] = [np.nan, None, pd.NaT] # create duplicated selection s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) s_duplicated.values.setflags(write=writable) na_mask = s_duplicated.isna().values result_unique, result_inverse = htable().factorize(s_duplicated.values) # drop_duplicates has own cython code (hash_table_func_helper.pxi) # and is tested separately; keeps first occurrence like ht.factorize() # since factorize removes all NaNs, we do the same here expected_unique = s_duplicated.dropna().drop_duplicates().values tm.assert_numpy_array_equal(result_unique, expected_unique) # reconstruction can only succeed if the inverse is correct. Since # factorize removes the NaNs, those have to be excluded here as well result_reconstruct = result_unique[result_inverse[~na_mask]] expected_reconstruct = s_duplicated.dropna().values tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)
Example #22
Source File: pytables.py From Computable with MIT License | 5 votes |
def unique(self, key, column, **kwargs): warnings.warn("unique(key,column) is deprecated\n" "use select_column(key,column).unique() instead", FutureWarning) return self.get_storer(key).read_column(column=column, **kwargs).unique()
Example #23
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_unique_tuples(self, arr, unique): # https://github.com/pandas-dev/pandas/issues/16519 expected = np.empty(len(unique), dtype=object) expected[:] = unique result = pd.unique(arr) tm.assert_numpy_array_equal(result, expected)
Example #24
Source File: multi.py From vnpy_crypto with MIT License | 5 votes |
def unique(self, level=None): if level is None: return super(MultiIndex, self).unique() else: level = self._get_level_number(level) return self._get_level_values(level=level, unique=True)
Example #25
Source File: frequencies.py From Computable with MIT License | 5 votes |
def _get_annual_rule(self): if len(self.ydiffs) > 1: return None if len(algos.unique(self.fields['M'])) > 1: return None pos_check = self.month_position_check() return {'cs': 'AS', 'bs': 'BAS', 'ce': 'A', 'be': 'BA'}.get(pos_check)
Example #26
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_unique_label_indices(): a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8') left = unique_label_indices(a) right = np.unique(a, return_index=True)[1] tm.assert_numpy_array_equal(left, right, check_dtype=False) a[np.random.choice(len(a), 10)] = -1 left = unique_label_indices(a) right = np.unique(a, return_index=True)[1][1:] tm.assert_numpy_array_equal(left, right, check_dtype=False)
Example #27
Source File: multi.py From vnpy_crypto with MIT License | 5 votes |
def _verify_integrity(self, labels=None, levels=None): """ Parameters ---------- labels : optional list Labels to check for validity. Defaults to current labels. levels : optional list Levels to check for validity. Defaults to current levels. Raises ------ ValueError If length of levels and labels don't match, if any label would exceed level bounds, or there are any duplicate levels. """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. labels = labels or self.labels levels = levels or self.levels if len(levels) != len(labels): raise ValueError("Length of levels and labels must match. NOTE:" " this index is in an inconsistent state.") label_length = len(self.labels[0]) for i, (level, label) in enumerate(zip(levels, labels)): if len(label) != label_length: raise ValueError("Unequal label lengths: %s" % ([len(lab) for lab in labels])) if len(label) and label.max() >= len(level): raise ValueError("On level %d, label max (%d) >= length of" " level (%d). NOTE: this index is in an" " inconsistent state" % (i, label.max(), len(level))) if not level.is_unique: raise ValueError("Level values must be unique: {values} on " "level {level}".format( values=[value for value in level], level=i))
Example #28
Source File: multi.py From vnpy_crypto with MIT License | 5 votes |
def _get_level_values(self, level, unique=False): """ Return vector of label values for requested level, equal to the length of the index **this is an internal method** Parameters ---------- level : int level unique : bool, default False if True, drop duplicated values Returns ------- values : ndarray """ values = self.levels[level] labels = self.labels[level] if unique: labels = algos.unique(labels) filled = algos.take_1d(values._values, labels, fill_value=values._na_value) values = values._shallow_copy(filled) return values
Example #29
Source File: datetimes.py From vnpy_crypto with MIT License | 5 votes |
def _convert_and_box_cache(arg, cache_array, box, errors, name=None): """ Convert array of dates with a cache and box the result Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series cache_array : Series Cache of converted, unique dates box : boolean True boxes result as an Index-like, False returns an ndarray errors : string 'ignore' plus box=True will convert result to Index name : string, default None Name for a DatetimeIndex Returns ------- result : datetime of converted dates Returns: - Index-like if box=True - ndarray if box=False """ from pandas import Series, DatetimeIndex, Index result = Series(arg).map(cache_array) if box: if errors == 'ignore': return Index(result) else: return DatetimeIndex(result, name=name) return result.values
Example #30
Source File: datetimes.py From vnpy_crypto with MIT License | 5 votes |
def _maybe_cache(arg, format, cache, tz, convert_listlike): """ Create a cache of unique dates from an array of dates Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series format : string Strftime format to parse time cache : boolean True attempts to create a cache of converted values tz : string Timezone of the dates convert_listlike : function Conversion function to apply on dates Returns ------- cache_array : Series Cache of converted, unique dates. Can be empty """ from pandas import Series cache_array = Series() if cache: # Perform a quicker unique check from pandas import Index if not Index(arg).is_unique: unique_dates = algorithms.unique(arg) cache_dates = convert_listlike(unique_dates, True, format, tz=tz) cache_array = Series(cache_dates, index=unique_dates) return cache_array