Python Examples of pandas.core.sorting.safe

Source File: test_window.py From twitter-stock-recommendation with MIT License

6 votes

def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: test_window.py From vnpy_crypto with MIT License

6 votes

def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: test_window.py From coffeegrindsize with MIT License

6 votes

def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: test_window.py From recruit with Apache License 2.0

6 votes

def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: test_window.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_pairwise_with_self(self, f):

        # DataFrame with itself, pairwise=True
        # note that we may construct the 1st level of the MI
        # in a non-motononic way, so compare accordingly
        results = []
        for i, df in enumerate(self.df1s):
            result = f(df)
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(df.columns.unique()))
            tm.assert_index_equal(result.columns, df.columns)
            results.append(df)

        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: merge.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right

Source File: test_window.py From twitter-stock-recommendation with MIT License

5 votes

def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: test_base.py From coffeegrindsize with MIT License

5 votes

def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected)

Source File: test_window.py From coffeegrindsize with MIT License

5 votes

def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: merge.py From elasticintel with GNU General Public License v3.0

5 votes

def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    l = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:l], new_labels[l:]

    return new_left, new_right

Source File: merge.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right

Source File: test_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected)

Source File: test_window.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: merge.py From vnpy_crypto with MIT License

5 votes

def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = _ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right

Source File: test_window.py From vnpy_crypto with MIT License

5 votes

def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: merge.py From recruit with Apache License 2.0

5 votes

def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    llength = len(left)
    labels = np.concatenate([left, right])

    _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
    new_labels = ensure_int64(new_labels)
    new_left, new_right = new_labels[:llength], new_labels[llength:]

    return new_left, new_right

Source File: test_base.py From recruit with Apache License 2.0

5 votes

def test_difference_base(self, sort):
        # (same results for py2 and py3 but sortedness not tested elsewhere)
        index = self.create_index()
        first = index[:4]
        second = index[3:]

        result = first.difference(second, sort)
        expected = Index([0, 'a', 1])
        if sort is None:
            expected = Index(safe_sort(expected))
        tm.assert_index_equal(result, expected)

Source File: test_window.py From recruit with Apache License 2.0

5 votes

def test_pairwise_with_other(self, f):

        # DataFrame with another DataFrame, pairwise=True
        results = [f(df, self.df2) for df in self.df1s]
        for (df, result) in zip(self.df1s, results):
            tm.assert_index_equal(result.index.levels[0],
                                  df.index,
                                  check_names=False)
            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
                                        safe_sort(self.df2.columns.unique()))
        for i, result in enumerate(results):
            if i > 0:
                self.compare(result, results[0])

Source File: algorithms.py From vnpy_crypto with MIT License

4 votes

def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        try:
            order = uniques.argsort()
            order2 = order.argsort()
            labels = take_1d(order2, labels, fill_value=na_sentinel)
            uniques = uniques.take(order)
        except TypeError:
            # Mixed types, where uniques.argsort fails.
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques

Source File: algorithms.py From predictive-maintenance-using-machine-learning with Apache License 2.0

4 votes

def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        if na_sentinel == -1:
            # GH-25409 take_1d only works for na_sentinels of -1
            try:
                order = uniques.argsort()
                order2 = order.argsort()
                labels = take_1d(order2, labels, fill_value=na_sentinel)
                uniques = uniques.take(order)
            except TypeError:
                # Mixed types, where uniques.argsort fails.
                uniques, labels = safe_sort(uniques, labels,
                                            na_sentinel=na_sentinel,
                                            assume_unique=True)
        else:
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques

Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0

4 votes

def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return Index([], name=self.name)

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None)

Source File: algorithms.py From Splunking-Crime with GNU Affero General Public License v3.0

4 votes

def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """

    values = _ensure_arraylike(values)
    original = values
    values, dtype, _ = _ensure_data(values)
    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)

    table = hash_klass(size_hint or len(values))
    uniques = vec_klass()
    check_nulls = not is_integer_dtype(original)
    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)

    labels = _ensure_platform_int(labels)
    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
                                    assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques

Source File: base.py From elasticintel with GNU General Public License v3.0

4 votes

def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return Index([], name=self.name)

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None)

Source File: base.py From vnpy_crypto with MIT License

4 votes

def difference(self, other):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.
        It's sorted if sorting is possible.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        difference : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')

        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._shallow_copy([])

        other, result_name = self._convert_can_do_setop(other)

        this = self._get_unique_index()

        indexer = this.get_indexer(other)
        indexer = indexer.take((indexer != -1).nonzero()[0])

        label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                  assume_unique=True)
        the_diff = this.values.take(label_diff)
        try:
            the_diff = sorting.safe_sort(the_diff)
        except TypeError:
            pass

        return this._shallow_copy(the_diff, name=result_name, freq=None)

Source File: algorithms.py From elasticintel with GNU General Public License v3.0

4 votes

def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """

    values = _ensure_arraylike(values)
    original = values
    values, dtype, _ = _ensure_data(values)
    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)

    table = hash_klass(size_hint or len(values))
    uniques = vec_klass()
    check_nulls = not is_integer_dtype(original)
    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)

    labels = _ensure_platform_int(labels)
    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
                                    assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques

Source File: algorithms.py From recruit with Apache License 2.0

4 votes

def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    # Implementation notes: This method is responsible for 3 things
    # 1.) coercing data to array-like (ndarray, Index, extension array)
    # 2.) factorizing labels and uniques
    # 3.) Maybe boxing the output in an Index
    #
    # Step 2 is dispatched to extension types (like Categorical). They are
    # responsible only for factorization. All data coercion, sorting and boxing
    # should happen here.

    values = _ensure_arraylike(values)
    original = values

    if is_extension_array_dtype(values):
        values = getattr(values, '_values', values)
        labels, uniques = values.factorize(na_sentinel=na_sentinel)
        dtype = original.dtype
    else:
        values, dtype, _ = _ensure_data(values)

        if (is_datetime64_any_dtype(original) or
                is_timedelta64_dtype(original) or
                is_period_dtype(original)):
            na_value = na_value_for_dtype(original.dtype)
        else:
            na_value = None

        labels, uniques = _factorize_array(values,
                                           na_sentinel=na_sentinel,
                                           size_hint=size_hint,
                                           na_value=na_value)

    if sort and len(uniques) > 0:
        from pandas.core.sorting import safe_sort
        if na_sentinel == -1:
            # GH-25409 take_1d only works for na_sentinels of -1
            try:
                order = uniques.argsort()
                order2 = order.argsort()
                labels = take_1d(order2, labels, fill_value=na_sentinel)
                uniques = uniques.take(order)
            except TypeError:
                # Mixed types, where uniques.argsort fails.
                uniques, labels = safe_sort(uniques, labels,
                                            na_sentinel=na_sentinel,
                                            assume_unique=True)
        else:
            uniques, labels = safe_sort(uniques, labels,
                                        na_sentinel=na_sentinel,
                                        assume_unique=True)

    uniques = _reconstruct_data(uniques, dtype, original)

    # return original tenor
    if isinstance(original, ABCIndexClass):
        uniques = original._shallow_copy(uniques, name=None)
    elif isinstance(original, ABCSeries):
        from pandas import Index
        uniques = Index(uniques)

    return labels, uniques

Python pandas.core.sorting.safe_sort() Examples