Python pandas.core.algorithms.duplicated() Examples

The following are 30 code examples of pandas.core.algorithms.duplicated(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.algorithms , or try the search function .
Example #1
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_unique_index(self):
        cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)]
        for case in cases:
            assert case.is_unique is True
            tm.assert_numpy_array_equal(case.duplicated(),
                                        np.array([False, False, False])) 
Example #2
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_unique_index(self):
        cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)]
        for case in cases:
            assert case.is_unique
            tm.assert_numpy_array_equal(case.duplicated(),
                                        np.array([False, False, False])) 
Example #3
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_numeric_object_likes(self, case):
        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        res_first = algos.duplicated(case, keep='first')
        tm.assert_numpy_array_equal(res_first, exp_first)

        res_last = algos.duplicated(case, keep='last')
        tm.assert_numpy_array_equal(res_last, exp_last)

        res_false = algos.duplicated(case, keep=False)
        tm.assert_numpy_array_equal(res_false, exp_false)

        # index
        for idx in [Index(case), Index(case, dtype='category')]:
            res_first = idx.duplicated(keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = idx.duplicated(keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = idx.duplicated(keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

        # series
        for s in [Series(case), Series(case, dtype='category')]:
            res_first = s.duplicated(keep='first')
            tm.assert_series_equal(res_first, Series(exp_first))

            res_last = s.duplicated(keep='last')
            tm.assert_series_equal(res_last, Series(exp_last))

            res_false = s.duplicated(keep=False)
            tm.assert_series_equal(res_false, Series(exp_false)) 
Example #4
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected) 
Example #5
Source File: base.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def drop_duplicates(self, keep='first', inplace=False):
        inplace = validate_bool_kwarg(inplace, 'inplace')
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return self._shallow_copy()

        duplicated = self.duplicated(keep=keep)
        result = self[np.logical_not(duplicated)]
        if inplace:
            return self._update_inplace(result)
        else:
            return result 
Example #6
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_unique_index(self):
        cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)]
        for case in cases:
            assert case.is_unique
            tm.assert_numpy_array_equal(case.duplicated(),
                                        np.array([False, False, False])) 
Example #7
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_numeric_object_likes(self, case):
        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        res_first = algos.duplicated(case, keep='first')
        tm.assert_numpy_array_equal(res_first, exp_first)

        res_last = algos.duplicated(case, keep='last')
        tm.assert_numpy_array_equal(res_last, exp_last)

        res_false = algos.duplicated(case, keep=False)
        tm.assert_numpy_array_equal(res_false, exp_false)

        # index
        for idx in [pd.Index(case), pd.Index(case, dtype='category')]:
            res_first = idx.duplicated(keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = idx.duplicated(keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = idx.duplicated(keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

        # series
        for s in [Series(case), Series(case, dtype='category')]:
            res_first = s.duplicated(keep='first')
            tm.assert_series_equal(res_first, Series(exp_first))

            res_last = s.duplicated(keep='last')
            tm.assert_series_equal(res_last, Series(exp_last))

            res_false = s.duplicated(keep=False)
            tm.assert_series_equal(res_false, Series(exp_false)) 
Example #8
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected) 
Example #9
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def duplicated(self, keep='first'):
        from pandas.core.algorithms import duplicated
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return np.zeros(len(self), dtype=np.bool)
            return duplicated(self, keep=keep)
        else:
            return self._constructor(duplicated(self, keep=keep),
                                     index=self.index).__finalize__(self)

    # ----------------------------------------------------------------------
    # abstracts 
Example #10
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def drop_duplicates(self, keep='first', inplace=False):
        inplace = validate_bool_kwarg(inplace, 'inplace')
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return self._shallow_copy()

        duplicated = self.duplicated(keep=keep)
        result = self[np.logical_not(duplicated)]
        if inplace:
            return self._update_inplace(result)
        else:
            return result 
Example #11
Source File: base.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def duplicated(self, keep='first'):
        from pandas.core.algorithms import duplicated
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return np.zeros(len(self), dtype=np.bool)
            return duplicated(self, keep=keep)
        else:
            return self._constructor(duplicated(self, keep=keep),
                                     index=self.index).__finalize__(self)

    # ----------------------------------------------------------------------
    # abstracts 
Example #12
Source File: base.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def drop_duplicates(self, keep='first', inplace=False):
        inplace = validate_bool_kwarg(inplace, 'inplace')
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return self._shallow_copy()

        duplicated = self.duplicated(keep=keep)
        result = self[np.logical_not(duplicated)]
        if inplace:
            return self._update_inplace(result)
        else:
            return result 
Example #13
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hashtable_unique(self, htable, tm_dtype, writable):
        # output of maker has guaranteed unique elements
        maker = getattr(tm, 'make' + tm_dtype + 'Index')
        s = Series(maker(1000))
        if htable == ht.Float64HashTable:
            # add NaN for float column
            s.loc[500] = np.nan
        elif htable == ht.PyObjectHashTable:
            # use different NaN types for object column
            s.loc[500:502] = [np.nan, None, pd.NaT]

        # create duplicated selection
        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
        s_duplicated.values.setflags(write=writable)

        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
        # and is tested separately; keeps first occurrence like ht.unique()
        expected_unique = s_duplicated.drop_duplicates(keep='first').values
        result_unique = htable().unique(s_duplicated.values)
        tm.assert_numpy_array_equal(result_unique, expected_unique)

        # test return_inverse=True
        # reconstruction can only succeed if the inverse is correct
        result_unique, result_inverse = htable().unique(s_duplicated.values,
                                                        return_inverse=True)
        tm.assert_numpy_array_equal(result_unique, expected_unique)
        reconstr = result_unique[result_inverse]
        tm.assert_numpy_array_equal(reconstr, s_duplicated.values) 
Example #14
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected) 
Example #15
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_numeric_object_likes(self, case):
        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        res_first = algos.duplicated(case, keep='first')
        tm.assert_numpy_array_equal(res_first, exp_first)

        res_last = algos.duplicated(case, keep='last')
        tm.assert_numpy_array_equal(res_last, exp_last)

        res_false = algos.duplicated(case, keep=False)
        tm.assert_numpy_array_equal(res_false, exp_false)

        # index
        for idx in [Index(case), Index(case, dtype='category')]:
            res_first = idx.duplicated(keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = idx.duplicated(keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = idx.duplicated(keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

        # series
        for s in [Series(case), Series(case, dtype='category')]:
            res_first = s.duplicated(keep='first')
            tm.assert_series_equal(res_first, Series(exp_first))

            res_last = s.duplicated(keep='last')
            tm.assert_series_equal(res_last, Series(exp_last))

            res_false = s.duplicated(keep=False)
            tm.assert_series_equal(res_false, Series(exp_false)) 
Example #16
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected) 
Example #17
Source File: base.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def drop_duplicates(self, keep='first', inplace=False):
        inplace = validate_bool_kwarg(inplace, 'inplace')
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return self._shallow_copy()

        duplicated = self.duplicated(keep=keep)
        result = self[np.logical_not(duplicated)]
        if inplace:
            return self._update_inplace(result)
        else:
            return result 
Example #18
Source File: test_algos.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_unique_index(self):
        cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)]
        for case in cases:
            assert case.is_unique
            tm.assert_numpy_array_equal(case.duplicated(),
                                        np.array([False, False, False])) 
Example #19
Source File: test_algos.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_numeric_object_likes(self, case):
        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        res_first = algos.duplicated(case, keep='first')
        tm.assert_numpy_array_equal(res_first, exp_first)

        res_last = algos.duplicated(case, keep='last')
        tm.assert_numpy_array_equal(res_last, exp_last)

        res_false = algos.duplicated(case, keep=False)
        tm.assert_numpy_array_equal(res_false, exp_false)

        # index
        for idx in [Index(case), Index(case, dtype='category')]:
            res_first = idx.duplicated(keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = idx.duplicated(keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = idx.duplicated(keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

        # series
        for s in [Series(case), Series(case, dtype='category')]:
            res_first = s.duplicated(keep='first')
            tm.assert_series_equal(res_first, Series(exp_first))

            res_last = s.duplicated(keep='last')
            tm.assert_series_equal(res_last, Series(exp_last))

            res_false = s.duplicated(keep=False)
            tm.assert_series_equal(res_false, Series(exp_false)) 
Example #20
Source File: test_algos.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected) 
Example #21
Source File: base.py    From recruit with Apache License 2.0 5 votes vote down vote up
def duplicated(self, keep='first'):
        from pandas.core.algorithms import duplicated
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return np.zeros(len(self), dtype=np.bool)
            return duplicated(self, keep=keep)
        else:
            return self._constructor(duplicated(self, keep=keep),
                                     index=self.index).__finalize__(self)

    # ----------------------------------------------------------------------
    # abstracts 
Example #22
Source File: base.py    From recruit with Apache License 2.0 5 votes vote down vote up
def drop_duplicates(self, keep='first', inplace=False):
        inplace = validate_bool_kwarg(inplace, 'inplace')
        if isinstance(self, ABCIndexClass):
            if self.is_unique:
                return self._shallow_copy()

        duplicated = self.duplicated(keep=keep)
        result = self[np.logical_not(duplicated)]
        if inplace:
            return self._update_inplace(result)
        else:
            return result 
Example #23
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_hashtable_unique(self, htable, tm_dtype, writable):
        # output of maker has guaranteed unique elements
        maker = getattr(tm, 'make' + tm_dtype + 'Index')
        s = Series(maker(1000))
        if htable == ht.Float64HashTable:
            # add NaN for float column
            s.loc[500] = np.nan
        elif htable == ht.PyObjectHashTable:
            # use different NaN types for object column
            s.loc[500:502] = [np.nan, None, pd.NaT]

        # create duplicated selection
        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
        s_duplicated.values.setflags(write=writable)

        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
        # and is tested separately; keeps first occurrence like ht.unique()
        expected_unique = s_duplicated.drop_duplicates(keep='first').values
        result_unique = htable().unique(s_duplicated.values)
        tm.assert_numpy_array_equal(result_unique, expected_unique)

        # test return_inverse=True
        # reconstruction can only succeed if the inverse is correct
        result_unique, result_inverse = htable().unique(s_duplicated.values,
                                                        return_inverse=True)
        tm.assert_numpy_array_equal(result_unique, expected_unique)
        reconstr = result_unique[result_inverse]
        tm.assert_numpy_array_equal(reconstr, s_duplicated.values) 
Example #24
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unique_index(self):
        cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)]
        for case in cases:
            assert case.is_unique is True
            tm.assert_numpy_array_equal(case.duplicated(),
                                        np.array([False, False, False])) 
Example #25
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_numeric_object_likes(self, case):
        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        res_first = algos.duplicated(case, keep='first')
        tm.assert_numpy_array_equal(res_first, exp_first)

        res_last = algos.duplicated(case, keep='last')
        tm.assert_numpy_array_equal(res_last, exp_last)

        res_false = algos.duplicated(case, keep=False)
        tm.assert_numpy_array_equal(res_false, exp_false)

        # index
        for idx in [Index(case), Index(case, dtype='category')]:
            res_first = idx.duplicated(keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = idx.duplicated(keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = idx.duplicated(keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

        # series
        for s in [Series(case), Series(case, dtype='category')]:
            res_first = s.duplicated(keep='first')
            tm.assert_series_equal(res_first, Series(exp_first))

            res_last = s.duplicated(keep='last')
            tm.assert_series_equal(res_last, Series(exp_last))

            res_false = s.duplicated(keep=False)
            tm.assert_series_equal(res_false, Series(exp_false)) 
Example #26
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([Timestamp(d) for d in dt]),
                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [Index(case), Index(case, dtype='category'),
                        Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [Series(case), Series(case, dtype='category'),
                      Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, Series(exp_false)) 
Example #27
Source File: test_algos.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([Timestamp(d) for d in dt]),
                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [Index(case), Index(case, dtype='category'),
                        Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [Series(case), Series(case, dtype='category'),
                      Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, Series(exp_false)) 
Example #28
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([Timestamp(d) for d in dt]),
                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [pd.Index(case), pd.Index(case, dtype='category'),
                        pd.Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [Series(case), Series(case, dtype='category'),
                      Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, Series(exp_false)) 
Example #29
Source File: test_algos.py    From recruit with Apache License 2.0 4 votes vote down vote up
def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([Timestamp(d) for d in dt]),
                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [Index(case), Index(case, dtype='category'),
                        Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [Series(case), Series(case, dtype='category'),
                      Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, Series(exp_false)) 
Example #30
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([Timestamp(d) for d in dt]),
                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [Index(case), Index(case, dtype='category'),
                        Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [Series(case), Series(case, dtype='category'),
                      Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, Series(exp_false))