Python pandas.core.sorting.safe_sort() Examples

The following are 26 code examples of pandas.core.sorting.safe_sort(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.sorting , or try the search function .
Example #1
Source File: test_window.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #2
Source File: test_window.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #3
Source File: test_window.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #4
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #5
Source File: test_window.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #6
Source File: merge.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right 
Example #7
Source File: test_window.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #8
Source File: test_base.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected) 
Example #9
Source File: test_window.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #10
Source File: merge.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    l = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:l], new_labels[l:]

    return new_left, new_right 
Example #11
Source File: merge.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right 
Example #12
Source File: test_base.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected) 
Example #13
Source File: test_window.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #14
Source File: merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right 
Example #15
Source File: test_window.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #16
Source File: merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right 
Example #17
Source File: test_base.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected) 
Example #18
Source File: test_window.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0]) 
Example #19
Source File: algorithms.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        try:
            order = uniques.argsort()
            order2 = order.argsort()
            labels = take_1d(order2, labels, fill_value=na_sentinel)
            uniques = uniques.take(order)
        except TypeError:
            # Mixed types, where uniques.argsort fails.
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques 
Example #20
Source File: algorithms.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        if na_sentinel == -1:
            # GH-25409 take_1d only works for na_sentinels of -1
            try:
                order = uniques.argsort()
                order2 = order.argsort()
                labels = take_1d(order2, labels, fill_value=na_sentinel)
                uniques = uniques.take(order)
            except TypeError:
                # Mixed types, where uniques.argsort fails.
                uniques, labels = safe_sort(uniques, labels,
                                            na_sentinel=na_sentinel,
                                            assume_unique=True)
        else:
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques 
Example #21
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return Index([], name=self.name)

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None) 
Example #22
Source File: algorithms.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """

    values = _ensure_arraylike(values)
    original = values
    values, dtype, _ = _ensure_data(values)
    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)

    table = hash_klass(size_hint or len(values))
    uniques = vec_klass()
    check_nulls = not is_integer_dtype(original)
    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)

    labels = _ensure_platform_int(labels)
    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
                                    assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques 
Example #23
Source File: base.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return Index([], name=self.name)

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None) 
Example #24
Source File: base.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._shallow_copy([])

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None) 
Example #25
Source File: algorithms.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """

    values = _ensure_arraylike(values)
    original = values
    values, dtype, _ = _ensure_data(values)
    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)

    table = hash_klass(size_hint or len(values))
    uniques = vec_klass()
    check_nulls = not is_integer_dtype(original)
    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)

    labels = _ensure_platform_int(labels)
    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
                                    assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques 
Example #26
Source File: algorithms.py    From recruit with Apache License 2.0 4 votes vote down vote up
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        if na_sentinel == -1:
            # GH-25409 take_1d only works for na_sentinels of -1
            try:
                order = uniques.argsort()
                order2 = order.argsort()
                labels = take_1d(order2, labels, fill_value=na_sentinel)
                uniques = uniques.take(order)
            except TypeError:
                # Mixed types, where uniques.argsort fails.
                uniques, labels = safe_sort(uniques, labels,
                                            na_sentinel=na_sentinel,
                                            assume_unique=True)
        else:
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques