Python pandas.core.algorithms.duplicated() Examples
The following are 30
code examples of pandas.core.algorithms.duplicated().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.algorithms
, or try the search function
.
Example #1
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_unique_index(self): cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique is True tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
Example #2
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_unique_index(self): cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
Example #3
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #4
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) result = algos.duplicated(keys) expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='first') expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array([True, False, True, True, False, True]) tm.assert_numpy_array_equal(result, expected) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)): keys[i] = t result = algos.duplicated(keys) falses = [False] * 4 trues = [True] * 4 expected = np.array(falses + trues) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array(trues + falses) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected)
Example #5
Source File: base.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result
Example #6
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_unique_index(self): cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
Example #7
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [pd.Index(case), pd.Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #8
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) result = algos.duplicated(keys) expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='first') expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array([True, False, True, True, False, True]) tm.assert_numpy_array_equal(result, expected) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)): keys[i] = t result = algos.duplicated(keys) falses = [False] * 4 trues = [True] * 4 expected = np.array(falses + trues) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array(trues + falses) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected)
Example #9
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: return np.zeros(len(self), dtype=np.bool) return duplicated(self, keep=keep) else: return self._constructor(duplicated(self, keep=keep), index=self.index).__finalize__(self) # ---------------------------------------------------------------------- # abstracts
Example #10
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result
Example #11
Source File: base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: return np.zeros(len(self), dtype=np.bool) return duplicated(self, keep=keep) else: return self._constructor(duplicated(self, keep=keep), index=self.index).__finalize__(self) # ---------------------------------------------------------------------- # abstracts
Example #12
Source File: base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result
Example #13
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_hashtable_unique(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, 'make' + tm_dtype + 'Index') s = Series(maker(1000)) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan elif htable == ht.PyObjectHashTable: # use different NaN types for object column s.loc[500:502] = [np.nan, None, pd.NaT] # create duplicated selection s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) s_duplicated.values.setflags(write=writable) # drop_duplicates has own cython code (hash_table_func_helper.pxi) # and is tested separately; keeps first occurrence like ht.unique() expected_unique = s_duplicated.drop_duplicates(keep='first').values result_unique = htable().unique(s_duplicated.values) tm.assert_numpy_array_equal(result_unique, expected_unique) # test return_inverse=True # reconstruction can only succeed if the inverse is correct result_unique, result_inverse = htable().unique(s_duplicated.values, return_inverse=True) tm.assert_numpy_array_equal(result_unique, expected_unique) reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
Example #14
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) result = algos.duplicated(keys) expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='first') expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array([True, False, True, True, False, True]) tm.assert_numpy_array_equal(result, expected) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)): keys[i] = t result = algos.duplicated(keys) falses = [False] * 4 trues = [True] * 4 expected = np.array(falses + trues) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array(trues + falses) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected)
Example #15
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #16
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) result = algos.duplicated(keys) expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='first') expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array([True, False, True, True, False, True]) tm.assert_numpy_array_equal(result, expected) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)): keys[i] = t result = algos.duplicated(keys) falses = [False] * 4 trues = [True] * 4 expected = np.array(falses + trues) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array(trues + falses) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected)
Example #17
Source File: base.py From vnpy_crypto with MIT License | 5 votes |
def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result
Example #18
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_unique_index(self): cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
Example #19
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #20
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) result = algos.duplicated(keys) expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='first') expected = np.array([False, False, False, True, False, True]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array([True, False, True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array([True, False, True, True, False, True]) tm.assert_numpy_array_equal(result, expected) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)): keys[i] = t result = algos.duplicated(keys) falses = [False] * 4 trues = [True] * 4 expected = np.array(falses + trues) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep='last') expected = np.array(trues + falses) tm.assert_numpy_array_equal(result, expected) result = algos.duplicated(keys, keep=False) expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected)
Example #21
Source File: base.py From recruit with Apache License 2.0 | 5 votes |
def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: return np.zeros(len(self), dtype=np.bool) return duplicated(self, keep=keep) else: return self._constructor(duplicated(self, keep=keep), index=self.index).__finalize__(self) # ---------------------------------------------------------------------- # abstracts
Example #22
Source File: base.py From recruit with Apache License 2.0 | 5 votes |
def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result
Example #23
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_hashtable_unique(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, 'make' + tm_dtype + 'Index') s = Series(maker(1000)) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan elif htable == ht.PyObjectHashTable: # use different NaN types for object column s.loc[500:502] = [np.nan, None, pd.NaT] # create duplicated selection s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) s_duplicated.values.setflags(write=writable) # drop_duplicates has own cython code (hash_table_func_helper.pxi) # and is tested separately; keeps first occurrence like ht.unique() expected_unique = s_duplicated.drop_duplicates(keep='first').values result_unique = htable().unique(s_duplicated.values) tm.assert_numpy_array_equal(result_unique, expected_unique) # test return_inverse=True # reconstruction can only succeed if the inverse is correct result_unique, result_inverse = htable().unique(s_duplicated.values, return_inverse=True) tm.assert_numpy_array_equal(result_unique, expected_unique) reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
Example #24
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_unique_index(self): cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique is True tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
Example #25
Source File: test_algos.py From recruit with Apache License 2.0 | 5 votes |
def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #26
Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 4 votes |
def test_datetime_likes(self): dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03', '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06'] td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last for case in cases: res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category'), Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category'), Series(case, dtype=object)]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #27
Source File: test_algos.py From vnpy_crypto with MIT License | 4 votes |
def test_datetime_likes(self): dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03', '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06'] td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last for case in cases: res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category'), Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category'), Series(case, dtype=object)]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #28
Source File: test_algos.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def test_datetime_likes(self): dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03', '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06'] td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last for case in cases: res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [pd.Index(case), pd.Index(case, dtype='category'), pd.Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category'), Series(case, dtype=object)]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #29
Source File: test_algos.py From recruit with Apache License 2.0 | 4 votes |
def test_datetime_likes(self): dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03', '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06'] td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last for case in cases: res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category'), Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category'), Series(case, dtype=object)]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))
Example #30
Source File: test_algos.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_datetime_likes(self): dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03', '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06'] td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] cases = [np.array([Timestamp(d) for d in dt]), np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last for case in cases: res_first = algos.duplicated(case, keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = algos.duplicated(case, keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = algos.duplicated(case, keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # index for idx in [Index(case), Index(case, dtype='category'), Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) # series for s in [Series(case), Series(case, dtype='category'), Series(case, dtype=object)]: res_first = s.duplicated(keep='first') tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false))