Python Examples of pandas.core.algorithms.factorize

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)

Source File: test_algos.py From recruit with Apache License 2.0

6 votes

def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)

Source File: test_algos.py From twitter-stock-recommendation with MIT License

6 votes

def test_uint64_factorize(self):
        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

        data = np.array([2**63, -1, 2**63], dtype=object)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, -1], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: test_algos.py From twitter-stock-recommendation with MIT License

6 votes

def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)

Source File: test_algos.py From twitter-stock-recommendation with MIT License

6 votes

def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp)

Source File: test_algos.py From recruit with Apache License 2.0

6 votes

def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp)

Source File: base.py From elasticintel with GNU General Public License v3.0

6 votes

def factorize(self, sort=False, na_sentinel=-1):
        """
        Encode the object as an enumerated type or categorical variable

        Parameters
        ----------
        sort : boolean, default False
            Sort by values
        na_sentinel: int, default -1
            Value to mark "not found"

        Returns
        -------
        labels : the indexer to the original array
        uniques : the unique Index
        """
        from pandas.core.algorithms import factorize
        return factorize(self, sort=sort, na_sentinel=na_sentinel)

Source File: test_algos.py From elasticintel with GNU General Public License v3.0

6 votes

def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)

Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def factorize(self, sort=False, na_sentinel=-1):
        """
        Encode the object as an enumerated type or categorical variable

        Parameters
        ----------
        sort : boolean, default False
            Sort by values
        na_sentinel: int, default -1
            Value to mark "not found"

        Returns
        -------
        labels : the indexer to the original array
        uniques : the unique Index
        """
        from pandas.core.algorithms import factorize
        return factorize(self, sort=sort, na_sentinel=na_sentinel)

Source File: test_algos.py From elasticintel with GNU General Public License v3.0

6 votes

def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = pd.Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = pd.Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp)

Source File: test_algos.py From vnpy_crypto with MIT License

6 votes

def test_factorize_nan(self):
        # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
        # rizer.factorize should not raise an exception if na_sentinel indexes
        # outside of reverse_indexer
        key = np.array([1, 2, 1, np.nan], dtype='O')
        rizer = ht.Factorizer(len(key))
        for na_sentinel in (-1, 20):
            ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
            assert len(set(key)) == len(set(expected))
            tm.assert_numpy_array_equal(pd.isna(key),
                                        expected == na_sentinel)

        # nan still maps to na_sentinel when sort=False
        key = np.array([0, np.nan, 1], dtype='O')
        na_sentinel = -1

        # TODO(wesm): unused?
        ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa

        expected = np.array([2, -1, 0], dtype='int32')
        assert len(set(key)) == len(set(expected))
        tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)

Source File: test_algos.py From vnpy_crypto with MIT License

6 votes

def test_uint64_factorize(self):
        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

        data = np.array([2**63, -1, 2**63], dtype=object)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63, -1], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: groupby.py From Computable with MIT License

6 votes

def _get_compressed_labels(self):
        all_labels = [ping.labels for ping in self.groupings]
        if self._overflow_possible:
            tups = lib.fast_zip(all_labels)
            labs, uniques = algos.factorize(tups)

            if self.sort:
                uniques, labs = _reorder_by_uniques(uniques, labs)

            return labs, uniques
        else:
            if len(all_labels) > 1:
                group_index = get_group_index(all_labels, self.shape)
                comp_ids, obs_group_ids = _compress_group_index(group_index)
            else:
                ping = self.groupings[0]
                comp_ids = ping.labels
                obs_group_ids = np.arange(len(ping.group_index))
                self.compressed = False
                self._filter_empty_groups = False

            return comp_ids, obs_group_ids

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp)

Source File: test_algos.py From vnpy_crypto with MIT License

6 votes

def test_mixed(self):

        # doc example reshaping.rst
        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
        labels, uniques = algos.factorize(x)

        exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index(['A', 'B', 3.14, np.inf])
        tm.assert_index_equal(uniques, exp)

        labels, uniques = algos.factorize(x, sort=True)
        exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = Index([3.14, np.inf, 'A', 'B'])
        tm.assert_index_equal(uniques, exp)

Source File: multi.py From elasticintel with GNU General Public License v3.0

5 votes

def _get_grouper_for_level(self, mapper, level):
        indexer = self.labels[level]
        level_index = self.levels[level]

        if mapper is not None:
            # Handle group mapping function and return
            level_values = self.levels[level].take(indexer)
            grouper = level_values.map(mapper)
            return grouper, None, None

        labels, uniques = algos.factorize(indexer, sort=True)

        if len(uniques) > 0 and uniques[0] == -1:
            # Handle NAs
            mask = indexer != -1
            ok_labels, uniques = algos.factorize(indexer[mask],
                                                 sort=True)

            labels = np.empty(len(indexer), dtype=indexer.dtype)
            labels[mask] = ok_labels
            labels[~mask] = -1

        if len(uniques) < len(level_index):
            # Remove unobserved levels from level_index
            level_index = level_index.take(uniques)

        grouper = level_index.take(labels)

        return grouper, labels, level_index

Source File: test_algos.py From twitter-stock-recommendation with MIT License

5 votes

def test_parametrized_factorize_na_value_default(self, data):
        # arrays that include the NA default for that type, but isn't used.
        l, u = algos.factorize(data)
        expected_uniques = data[[0, 1]]
        expected_labels = np.array([0, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(l, expected_labels)
        tm.assert_numpy_array_equal(u, expected_uniques)

Source File: test_algos.py From elasticintel with GNU General Public License v3.0

5 votes

def test_basic(self):

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c',
                                           'c'])
        tm.assert_numpy_array_equal(
            uniques, np.array(['a', 'b', 'c'], dtype=object))

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a',
                                           'a', 'c', 'c', 'c'], sort=True)
        exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array(['a', 'b', 'c'], dtype=object)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))), sort=True)

        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))),
                                          sort=True)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

Source File: groupby.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _make_labels(self):
        if self._labels is None or self._group_index is None:
            # we have a list of groupers
            if isinstance(self.grouper, BaseGrouper):
                labels = self.grouper.label_info
                uniques = self.grouper.result_index
            else:
                labels, uniques = algorithms.factorize(
                    self.grouper, sort=self.sort)
                uniques = Index(uniques, name=self.name)
            self._labels = labels
            self._group_index = uniques

Source File: test_algos.py From elasticintel with GNU General Public License v3.0

5 votes

def test_complex_sorting(self):
        # gh 12666 - check no segfault
        # Test not valid numpy versions older than 1.11
        if pd._np_version_under1p11:
            pytest.skip("Test valid only for numpy 1.11+")

        x17 = np.array([complex(i) for i in range(17)], dtype=object)

        pytest.raises(TypeError, algos.factorize, x17[::-1], sort=True)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_hashtable_factorize(self, htable, tm_dtype, writable):
        # output of maker has guaranteed unique elements
        maker = getattr(tm, 'make' + tm_dtype + 'Index')
        s = Series(maker(1000))
        if htable == ht.Float64HashTable:
            # add NaN for float column
            s.loc[500] = np.nan
        elif htable == ht.PyObjectHashTable:
            # use different NaN types for object column
            s.loc[500:502] = [np.nan, None, pd.NaT]

        # create duplicated selection
        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
        s_duplicated.values.setflags(write=writable)
        na_mask = s_duplicated.isna().values

        result_unique, result_inverse = htable().factorize(s_duplicated.values)

        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
        # and is tested separately; keeps first occurrence like ht.factorize()
        # since factorize removes all NaNs, we do the same here
        expected_unique = s_duplicated.dropna().drop_duplicates().values
        tm.assert_numpy_array_equal(result_unique, expected_unique)

        # reconstruction can only succeed if the inverse is correct. Since
        # factorize removes the NaNs, those have to be excluded here as well
        result_reconstruct = result_unique[result_inverse[~na_mask]]
        expected_reconstruct = s_duplicated.dropna().values
        tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_factorize_na_sentinel(self, sort, na_sentinel):
        data = np.array(['b', 'a', None, 'b'], dtype=object)
        labels, uniques = algos.factorize(data, sort=sort,
                                          na_sentinel=na_sentinel)
        if sort:
            expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
            expected_uniques = np.array(['a', 'b'], dtype=object)
        else:
            expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
            expected_uniques = np.array(['b', 'a'], dtype=object)
        tm.assert_numpy_array_equal(labels, expected_labels)
        tm.assert_numpy_array_equal(uniques, expected_uniques)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_parametrized_factorize_na_value_default(self, data):
        # arrays that include the NA default for that type, but isn't used.
        l, u = algos.factorize(data)
        expected_uniques = data[[0, 1]]
        expected_labels = np.array([0, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(l, expected_labels)
        tm.assert_numpy_array_equal(u, expected_uniques)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_object_factorize(self, writable):
        data = np.array(['a', 'c', None, np.nan, 'a', 'b', pd.NaT, 'c'],
                        dtype=object)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_string_factorize(self, writable):
        data = np.array(['a', 'c', 'a', 'b', 'c'],
                        dtype=object)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0, 2, 1], dtype=np.intp)
        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_int64_factorize(self, writable):
        data = np.array([2**63 - 1, -2**63, 2**63 - 1], dtype=np.int64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**63 - 1, -2**63], dtype=np.int64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_uint64_factorize(self, writable):
        data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0], dtype=np.intp)
        exp_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: test_algos.py From recruit with Apache License 2.0

5 votes

def test_basic(self):

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c',
                                           'c'])
        tm.assert_numpy_array_equal(
            uniques, np.array(['a', 'b', 'c'], dtype=object))

        labels, uniques = algos.factorize(['a', 'b', 'b', 'a',
                                           'a', 'c', 'c', 'c'], sort=True)
        exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array(['a', 'b', 'c'], dtype=object)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(range(5))), sort=True)

        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))))
        exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

        labels, uniques = algos.factorize(list(reversed(np.arange(5.))),
                                          sort=True)
        exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
        tm.assert_numpy_array_equal(labels, exp)
        exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64)
        tm.assert_numpy_array_equal(uniques, exp)

Source File: test_algos.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_float64_factorize(self, writable):
        data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
        data.setflags(write=writable)
        exp_labels = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
        exp_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)

        labels, uniques = algos.factorize(data)
        tm.assert_numpy_array_equal(labels, exp_labels)
        tm.assert_numpy_array_equal(uniques, exp_uniques)

Source File: grouper.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _make_labels(self):
        if self._labels is None or self._group_index is None:
            # we have a list of groupers
            if isinstance(self.grouper, BaseGrouper):
                labels = self.grouper.label_info
                uniques = self.grouper.result_index
            else:
                labels, uniques = algorithms.factorize(
                    self.grouper, sort=self.sort)
                uniques = Index(uniques, name=self.name)
            self._labels = labels
            self._group_index = uniques

Python pandas.core.algorithms.factorize() Examples