Python pandas.core.algorithms.factorize() Examples

The following are 30 code examples of pandas.core.algorithms.factorize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.algorithms , or try the search function .
Example #1
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) 
Example #2
Source File: test_algos.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) 
Example #3
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_uint64_factorize(self):
        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

        data = np.array([2**63, -1, 2**63], dtype=object)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, -1], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #4
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) 
Example #5
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp) 
Example #6
Source File: test_algos.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp) 
Example #7
Source File: base.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def factorize(self, sort=False, na_sentinel=-1):
        """
        Encode the object as an enumerated type or categorical variable

        Parameters
        ----------
        sort : boolean, default False
            Sort by values
        na_sentinel: int, default -1
            Value to mark "not found"

        Returns
        -------
        labels : the indexer to the original array
        uniques : the unique Index
        """
        from pandas.core.algorithms import factorize
        return factorize(self, sort=sort, na_sentinel=na_sentinel) 
Example #8
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) 
Example #9
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def factorize(self, sort=False, na_sentinel=-1):
        """
        Encode the object as an enumerated type or categorical variable

        Parameters
        ----------
        sort : boolean, default False
            Sort by values
        na_sentinel: int, default -1
            Value to mark "not found"

        Returns
        -------
        labels : the indexer to the original array
        uniques : the unique Index
        """
        from pandas.core.algorithms import factorize
        return factorize(self, sort=sort, na_sentinel=na_sentinel) 
Example #10
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = pd.Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = pd.Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp) 
Example #11
Source File: test_algos.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) 
Example #12
Source File: test_algos.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_uint64_factorize(self):
        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

        data = np.array([2**63, -1, 2**63], dtype=object)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, -1], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #13
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def _get_compressed_labels(self):
        all_labels = [ping.labels for ping in self.groupings]
        if self._overflow_possible:
            tups = lib.fast_zip(all_labels)
            labs, uniques = algos.factorize(tups)

            if self.sort:
                uniques, labs = _reorder_by_uniques(uniques, labs)

            return labs, uniques
        else:
            if len(all_labels) > 1:
                group_index = get_group_index(all_labels, self.shape)
                comp_ids, obs_group_ids = _compress_group_index(group_index)
            else:
                ping = self.groupings[0]
                comp_ids = ping.labels
                obs_group_ids = np.arange(len(ping.group_index))
                self.compressed = False
                self._filter_empty_groups = False

            return comp_ids, obs_group_ids 
Example #14
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp) 
Example #15
Source File: test_algos.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp) 
Example #16
Source File: multi.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _get_grouper_for_level(self, mapper, level):
        indexer = self.labels[level]
        level_index = self.levels[level]

        if mapper is not None:
            # Handle group mapping function and return
            level_values = self.levels[level].take(indexer)
            grouper = level_values.map(mapper)
            return grouper, None, None

        labels, uniques = algos.factorize(indexer, sort=True)

        if len(uniques) > 0 and uniques[0] == -1:
            # Handle NAs
            mask = indexer != -1
            ok_labels, uniques = algos.factorize(indexer[mask],
                                                 sort=True)

            labels = np.empty(len(indexer), dtype=indexer.dtype)
            labels[mask] = ok_labels
            labels[~mask] = -1

        if len(uniques) < len(level_index):
            # Remove unobserved levels from level_index
            level_index = level_index.take(uniques)

        grouper = level_index.take(labels)

        return grouper, labels, level_index 
Example #17
Source File: test_algos.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_parametrized_factorize_na_value_default(self, data):
        # arrays that include the NA default for that type, but isn't used.
        l, u = algos.factorize(data)
        expected_uniques = data[[0, 1]]
        expected_labels = np.array([0, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(l, expected_labels)
        tm.assert_numpy_array_equal(u, expected_uniques) 
Example #18
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_basic(self):

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c',
                                           'c'])
        tm.assert_numpy_array_equal(
            uniques, np.array(['a', 'b', 'c'], dtype=object))

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a',
                                           'a', 'c', 'c', 'c'], sort=True)
        exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array(['a', 'b', 'c'], dtype=object)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))), sort=True)

        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))),
                                          sort=True)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp) 
Example #19
Source File: groupby.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _make_labels(self):
        if self._labels is None or self._group_index is None:
            # we have a list of groupers
            if isinstance(self.grouper, BaseGrouper):
                labels = self.grouper.label_info
                uniques = self.grouper.result_index
            else:
                labels, uniques = algorithms.factorize(
                    self.grouper, sort=self.sort)
                uniques = Index(uniques, name=self.name)
            self._labels = labels
            self._group_index = uniques 
Example #20
Source File: test_algos.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_complex_sorting(self):
        # gh 12666 - check no segfault
        # Test not valid numpy versions older than 1.11
        if pd._np_version_under1p11:
            pytest.skip("Test valid only for numpy 1.11+")

        x17 = np.array([complex(i) for i in range(17)], dtype=object)

        pytest.raises(TypeError, algos.factorize, x17[::-1], sort=True) 
Example #21
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hashtable_factorize(self, htable, tm_dtype, writable):
        # output of maker has guaranteed unique elements
        maker = getattr(tm, 'make' + tm_dtype + 'Index')
        s = Series(maker(1000))
        if htable == ht.Float64HashTable:
            # add NaN for float column
            s.loc[500] = np.nan
        elif htable == ht.PyObjectHashTable:
            # use different NaN types for object column
            s.loc[500:502] = [np.nan, None, pd.NaT]

        # create duplicated selection
        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
        s_duplicated.values.setflags(write=writable)
        na_mask = s_duplicated.isna().values

        result_unique, result_inverse = htable().factorize(s_duplicated.values)

        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
        # and is tested separately; keeps first occurrence like ht.factorize()
        # since factorize removes all NaNs, we do the same here
        expected_unique = s_duplicated.dropna().drop_duplicates().values
        tm.assert_numpy_array_equal(result_unique, expected_unique)

        # reconstruction can only succeed if the inverse is correct. Since
        # factorize removes the NaNs, those have to be excluded here as well
        result_reconstruct = result_unique[result_inverse[~na_mask]]
        expected_reconstruct = s_duplicated.dropna().values
        tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct) 
Example #22
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_factorize_na_sentinel(self, sort, na_sentinel):
        data = np.array(['b', 'a', None, 'b'], dtype=object)
        labels, uniques = algos.factorize(data, sort=sort,
                                          na_sentinel=na_sentinel)
        if sort:
            expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
            expected_uniques = np.array(['a', 'b'], dtype=object)
        else:
            expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
            expected_uniques = np.array(['b', 'a'], dtype=object)
        tm.assert_numpy_array_equal(labels, expected_labels)
        tm.assert_numpy_array_equal(uniques, expected_uniques) 
Example #23
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_parametrized_factorize_na_value_default(self, data):
        # arrays that include the NA default for that type, but isn't used.
        l, u = algos.factorize(data)
        expected_uniques = data[[0, 1]]
        expected_labels = np.array([0, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(l, expected_labels)
        tm.assert_numpy_array_equal(u, expected_uniques) 
Example #24
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_object_factorize(self, writable):
        data = np.array(['a', 'c', None, np.nan, 'a', 'b', pd.NaT, 'c'],
                        dtype=object)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #25
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_string_factorize(self, writable):
        data = np.array(['a', 'c', 'a', 'b', 'c'],
                        dtype=object)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0, 2, 1], dtype=np.intp)
        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #26
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_int64_factorize(self, writable):
        data = np.array([2**63 - 1, -2**63, 2**63 - 1], dtype=np.int64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63 - 1, -2**63], dtype=np.int64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #27
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_uint64_factorize(self, writable):
        data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #28
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_basic(self):

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c',
                                           'c'])
        tm.assert_numpy_array_equal(
            uniques, np.array(['a', 'b', 'c'], dtype=object))

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a',
                                           'a', 'c', 'c', 'c'], sort=True)
        exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array(['a', 'b', 'c'], dtype=object)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))), sort=True)

        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))),
                                          sort=True)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp) 
Example #29
Source File: test_algos.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_float64_factorize(self, writable):
        data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
        exp_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques) 
Example #30
Source File: grouper.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _make_labels(self):
        if self._labels is None or self._group_index is None:
            # we have a list of groupers
            if isinstance(self.grouper, BaseGrouper):
                labels = self.grouper.label_info
                uniques = self.grouper.result_index
            else:
                labels, uniques = algorithms.factorize(
                    self.grouper, sort=self.sort)
                uniques = Index(uniques, name=self.name)
            self._labels = labels
            self._group_index = uniques