Python pandas.core.sorting.safe_sort() Examples
The following are 26
code examples of pandas.core.sorting.safe_sort().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.sorting
, or try the search function
.
Example #1
Source File: test_window.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI # in a non-motononic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) results.append(df) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #2
Source File: test_window.py From vnpy_crypto with MIT License | 6 votes |
def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI # in a non-motononic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) results.append(df) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #3
Source File: test_window.py From coffeegrindsize with MIT License | 6 votes |
def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI # in a non-motononic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) results.append(df) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #4
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI # in a non-motononic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) results.append(df) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #5
Source File: test_window.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI # in a non-motononic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) results.append(df) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #6
Source File: merge.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = ensure_int64(new_labels) new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right
Example #7
Source File: test_window.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pairwise_with_other(self, f): # DataFrame with another DataFrame, pairwise=True results = [f(df, self.df2) for df in self.df1s] for (df, result) in zip(self.df1s, results): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #8
Source File: test_base.py From coffeegrindsize with MIT License | 5 votes |
def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:4] second = index[3:] result = first.difference(second, sort) expected = Index([0, 'a', 1]) if sort is None: expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected)
Example #9
Source File: test_window.py From coffeegrindsize with MIT License | 5 votes |
def test_pairwise_with_other(self, f): # DataFrame with another DataFrame, pairwise=True results = [f(df, self.df2) for df in self.df1s] for (df, result) in zip(self.df1s, results): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #10
Source File: merge.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values l = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = _ensure_int64(new_labels) new_left, new_right = new_labels[:l], new_labels[l:] return new_left, new_right
Example #11
Source File: merge.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = _ensure_int64(new_labels) new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right
Example #12
Source File: test_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:4] second = index[3:] result = first.difference(second, sort) expected = Index([0, 'a', 1]) if sort is None: expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected)
Example #13
Source File: test_window.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_pairwise_with_other(self, f): # DataFrame with another DataFrame, pairwise=True results = [f(df, self.df2) for df in self.df1s] for (df, result) in zip(self.df1s, results): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #14
Source File: merge.py From vnpy_crypto with MIT License | 5 votes |
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = _ensure_int64(new_labels) new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right
Example #15
Source File: test_window.py From vnpy_crypto with MIT License | 5 votes |
def test_pairwise_with_other(self, f): # DataFrame with another DataFrame, pairwise=True results = [f(df, self.df2) for df in self.df1s] for (df, result) in zip(self.df1s, results): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #16
Source File: merge.py From recruit with Apache License 2.0 | 5 votes |
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = ensure_int64(new_labels) new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right
Example #17
Source File: test_base.py From recruit with Apache License 2.0 | 5 votes |
def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:4] second = index[3:] result = first.difference(second, sort) expected = Index([0, 'a', 1]) if sort is None: expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected)
Example #18
Source File: test_window.py From recruit with Apache License 2.0 | 5 votes |
def test_pairwise_with_other(self, f): # DataFrame with another DataFrame, pairwise=True results = [f(df, self.df2) for df in self.df1s] for (df, result) in zip(self.df1s, results): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0])
Example #19
Source File: algorithms.py From vnpy_crypto with MIT License | 4 votes |
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): # Implementation notes: This method is responsible for 3 things # 1.) coercing data to array-like (ndarray, Index, extension array) # 2.) factorizing labels and uniques # 3.) Maybe boxing the output in an Index # # Step 2 is dispatched to extension types (like Categorical). They are # responsible only for factorization. All data coercion, sorting and boxing # should happen here. values = _ensure_arraylike(values) original = values if is_extension_array_dtype(values): values = getattr(values, '_values', values) labels, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype else: values, dtype, _ = _ensure_data(values) if (is_datetime64_any_dtype(original) or is_timedelta64_dtype(original) or is_period_dtype(original)): na_value = na_value_for_dtype(original.dtype) else: na_value = None labels, uniques = _factorize_array(values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value) if sort and len(uniques) > 0: from pandas.core.sorting import safe_sort try: order = uniques.argsort() order2 = order.argsort() labels = take_1d(order2, labels, fill_value=na_sentinel) uniques = uniques.take(order) except TypeError: # Mixed types, where uniques.argsort fails. uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) uniques = _reconstruct_data(uniques, dtype, original) # return original tenor if isinstance(original, ABCIndexClass): uniques = original._shallow_copy(uniques, name=None) elif isinstance(original, ABCSeries): from pandas import Index uniques = Index(uniques) return labels, uniques
Example #20
Source File: algorithms.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 4 votes |
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): # Implementation notes: This method is responsible for 3 things # 1.) coercing data to array-like (ndarray, Index, extension array) # 2.) factorizing labels and uniques # 3.) Maybe boxing the output in an Index # # Step 2 is dispatched to extension types (like Categorical). They are # responsible only for factorization. All data coercion, sorting and boxing # should happen here. values = _ensure_arraylike(values) original = values if is_extension_array_dtype(values): values = getattr(values, '_values', values) labels, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype else: values, dtype, _ = _ensure_data(values) if (is_datetime64_any_dtype(original) or is_timedelta64_dtype(original) or is_period_dtype(original)): na_value = na_value_for_dtype(original.dtype) else: na_value = None labels, uniques = _factorize_array(values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value) if sort and len(uniques) > 0: from pandas.core.sorting import safe_sort if na_sentinel == -1: # GH-25409 take_1d only works for na_sentinels of -1 try: order = uniques.argsort() order2 = order.argsort() labels = take_1d(order2, labels, fill_value=na_sentinel) uniques = uniques.take(order) except TypeError: # Mixed types, where uniques.argsort fails. uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) else: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) uniques = _reconstruct_data(uniques, dtype, original) # return original tenor if isinstance(original, ABCIndexClass): uniques = original._shallow_copy(uniques, name=None) elif isinstance(original, ABCSeries): from pandas import Index uniques = Index(uniques) return labels, uniques
Example #21
Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def difference(self, other): """ Return a new Index with elements from the index that are not in `other`. This is the set difference of two Index objects. It's sorted if sorting is possible. Parameters ---------- other : Index or array-like Returns ------- difference : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) Int64Index([1, 2], dtype='int64') """ self._assert_can_do_setop(other) if self.equals(other): return Index([], name=self.name) other, result_name = self._convert_can_do_setop(other) this = self._get_unique_index() indexer = this.get_indexer(other) indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) try: the_diff = sorting.safe_sort(the_diff) except TypeError: pass return this._shallow_copy(the_diff, name=result_name, freq=None)
Example #22
Source File: algorithms.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ values = _ensure_arraylike(values) original = values values, dtype, _ = _ensure_data(values) (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables) table = hash_klass(size_hint or len(values)) uniques = vec_klass() check_nulls = not is_integer_dtype(original) labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: from pandas.core.sorting import safe_sort uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) uniques = _reconstruct_data(uniques, dtype, original) # return original tenor if isinstance(original, ABCIndexClass): uniques = original._shallow_copy(uniques, name=None) elif isinstance(original, ABCSeries): from pandas import Index uniques = Index(uniques) return labels, uniques
Example #23
Source File: base.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def difference(self, other): """ Return a new Index with elements from the index that are not in `other`. This is the set difference of two Index objects. It's sorted if sorting is possible. Parameters ---------- other : Index or array-like Returns ------- difference : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) Int64Index([1, 2], dtype='int64') """ self._assert_can_do_setop(other) if self.equals(other): return Index([], name=self.name) other, result_name = self._convert_can_do_setop(other) this = self._get_unique_index() indexer = this.get_indexer(other) indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) try: the_diff = sorting.safe_sort(the_diff) except TypeError: pass return this._shallow_copy(the_diff, name=result_name, freq=None)
Example #24
Source File: base.py From vnpy_crypto with MIT License | 4 votes |
def difference(self, other): """ Return a new Index with elements from the index that are not in `other`. This is the set difference of two Index objects. It's sorted if sorting is possible. Parameters ---------- other : Index or array-like Returns ------- difference : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) Int64Index([1, 2], dtype='int64') """ self._assert_can_do_setop(other) if self.equals(other): return self._shallow_copy([]) other, result_name = self._convert_can_do_setop(other) this = self._get_unique_index() indexer = this.get_indexer(other) indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) try: the_diff = sorting.safe_sort(the_diff) except TypeError: pass return this._shallow_copy(the_diff, name=result_name, freq=None)
Example #25
Source File: algorithms.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ values = _ensure_arraylike(values) original = values values, dtype, _ = _ensure_data(values) (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables) table = hash_klass(size_hint or len(values)) uniques = vec_klass() check_nulls = not is_integer_dtype(original) labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: from pandas.core.sorting import safe_sort uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) uniques = _reconstruct_data(uniques, dtype, original) # return original tenor if isinstance(original, ABCIndexClass): uniques = original._shallow_copy(uniques, name=None) elif isinstance(original, ABCSeries): from pandas import Index uniques = Index(uniques) return labels, uniques
Example #26
Source File: algorithms.py From recruit with Apache License 2.0 | 4 votes |
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): # Implementation notes: This method is responsible for 3 things # 1.) coercing data to array-like (ndarray, Index, extension array) # 2.) factorizing labels and uniques # 3.) Maybe boxing the output in an Index # # Step 2 is dispatched to extension types (like Categorical). They are # responsible only for factorization. All data coercion, sorting and boxing # should happen here. values = _ensure_arraylike(values) original = values if is_extension_array_dtype(values): values = getattr(values, '_values', values) labels, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype else: values, dtype, _ = _ensure_data(values) if (is_datetime64_any_dtype(original) or is_timedelta64_dtype(original) or is_period_dtype(original)): na_value = na_value_for_dtype(original.dtype) else: na_value = None labels, uniques = _factorize_array(values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value) if sort and len(uniques) > 0: from pandas.core.sorting import safe_sort if na_sentinel == -1: # GH-25409 take_1d only works for na_sentinels of -1 try: order = uniques.argsort() order2 = order.argsort() labels = take_1d(order2, labels, fill_value=na_sentinel) uniques = uniques.take(order) except TypeError: # Mixed types, where uniques.argsort fails. uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) else: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) uniques = _reconstruct_data(uniques, dtype, original) # return original tenor if isinstance(original, ABCIndexClass): uniques = original._shallow_copy(uniques, name=None) elif isinstance(original, ABCSeries): from pandas import Index uniques = Index(uniques) return labels, uniques