Python sklearn.utils.safe_indexing() Examples
The following are 13
code examples of sklearn.utils.safe_indexing().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_safe_indexing_pandas(): try: import pandas as pd except ImportError: raise SkipTest("Pandas not found") X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) X_df = pd.DataFrame(X) inds = np.array([1, 2]) X_df_indexed = safe_indexing(X_df, inds) X_indexed = safe_indexing(X_df, inds) assert_array_equal(np.array(X_df_indexed), X_indexed) # fun with read-only data in dataframes # this happens in joblib memmapping X.setflags(write=False) X_df_readonly = pd.DataFrame(X) inds_readonly = inds.copy() inds_readonly.setflags(write=False) for this_df, this_inds in product([X_df, X_df_readonly], [inds, inds_readonly]): with warnings.catch_warnings(record=True): X_df_indexed = safe_indexing(this_df, this_inds) assert_array_equal(np.array(X_df_indexed), X_indexed)
Example #2
Source File: test_utils.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_safe_indexing_pandas(): try: import pandas as pd except ImportError: raise SkipTest("Pandas not found") X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) X_df = pd.DataFrame(X) inds = np.array([1, 2]) X_df_indexed = safe_indexing(X_df, inds) X_indexed = safe_indexing(X_df, inds) assert_array_equal(np.array(X_df_indexed), X_indexed) # fun with read-only data in dataframes # this happens in joblib memmapping X.setflags(write=False) X_df_readonly = pd.DataFrame(X) inds_readonly = inds.copy() inds_readonly.setflags(write=False) for this_df, this_inds in product([X_df, X_df_readonly], [inds, inds_readonly]): with warnings.catch_warnings(record=True): X_df_indexed = safe_indexing(this_df, this_inds) assert_array_equal(np.array(X_df_indexed), X_indexed)
Example #3
Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_safe_indexing(): X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] inds = np.array([1, 2]) X_inds = safe_indexing(X, inds) X_arrays = safe_indexing(np.array(X), inds) assert_array_equal(np.array(X_inds), X_arrays) assert_array_equal(np.array(X_inds), np.array(X)[inds])
Example #4
Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_safe_indexing_mock_pandas(): X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) X_df = MockDataFrame(X) inds = np.array([1, 2]) X_df_indexed = safe_indexing(X_df, inds) X_indexed = safe_indexing(X_df, inds) assert_array_equal(np.array(X_df_indexed), X_indexed)
Example #5
Source File: utils.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _indexing_other(data, i): # sklearn's safe_indexing doesn't work with tuples since 0.22 if isinstance(i, (int, np.integer, slice, tuple)): return data[i] return safe_indexing(data, i)
Example #6
Source File: _validation.py From mriqc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _shuffle(y, groups, random_state): """Return a shuffled copy of y eventually shuffle among same groups.""" if groups is None: indices = random_state.permutation(len(y)) else: indices = np.arange(len(groups)) for group in np.unique(groups): this_mask = groups == group indices[this_mask] = random_state.permutation(indices[this_mask]) return safe_indexing(y, indices)
Example #7
Source File: base.py From skoot with MIT License | 5 votes |
def _reorder(X, y, random_state, shuffle): # reorder if needed order = np.arange(X.shape[0]) if shuffle: order = random_state.permutation(order) return safe_indexing(X, order), y[order]
Example #8
Source File: sklearn.py From optuna with MIT License | 5 votes |
def _safe_indexing( X, # type: Union[OneDimArrayLikeType, TwoDimArrayLikeType] indices, # type: OneDimArrayLikeType ): # type: (...) -> Union[OneDimArrayLikeType, TwoDimArrayLikeType] if X is None: return X return sklearn_safe_indexing(X, indices)
Example #9
Source File: features.py From SecuML with GNU General Public License v2.0 | 5 votes |
def get_from_ids(self, instance_ids): if self.streaming: raise StreamingUnsupported('get_from_ids is not supported for ' 'streaming features.') indices = [self.instance_ids.get_index(id_) for id_ in instance_ids.ids] values = safe_indexing(self.values, indices) return Features(values, self.info, instance_ids)
Example #10
Source File: features.py From SecuML with GNU General Public License v2.0 | 5 votes |
def get_from_indices(self, instance_ids, indices): if self.streaming: raise StreamingUnsupported('get_from_ids is not supported for ' 'streaming features.') if len(indices) > 0: values = safe_indexing(self.values, indices) else: values = np.empty((0, self.values.shape[1])) return Features(values, self.info, instance_ids)
Example #11
Source File: test_utils.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_safe_indexing(): X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] inds = np.array([1, 2]) X_inds = safe_indexing(X, inds) X_arrays = safe_indexing(np.array(X), inds) assert_array_equal(np.array(X_inds), X_arrays) assert_array_equal(np.array(X_inds), np.array(X)[inds])
Example #12
Source File: test_utils.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_safe_indexing_mock_pandas(): X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) X_df = MockDataFrame(X) inds = np.array([1, 2]) X_df_indexed = safe_indexing(X_df, inds) X_indexed = safe_indexing(X_df, inds) assert_array_equal(np.array(X_df_indexed), X_indexed)
Example #13
Source File: KMeansClustering.py From driverlessai-recipes with Apache License 2.0 | 4 votes |
def my_davies_bouldin_score(X, labels): """Computes the Davies-Bouldin score. The score is defined as the ratio of within-cluster distances to between-cluster distances. Read more in the :ref:`User Guide <davies-bouldin_index>`. Parameters ---------- X : array-like, shape (``n_samples``, ``n_features``) List of ``n_features``-dimensional data points. Each row corresponds to a single data point. labels : array-like, shape (``n_samples``,) Predicted labels for each sample. Returns ------- score: float The resulting Davies-Bouldin score. References ---------- .. [1] Davies, David L.; Bouldin, Donald W. (1979). `"A Cluster Separation Measure" <https://ieeexplore.ieee.org/document/4766909>`__. IEEE Transactions on Pattern Analysis and Machine Intelligence. PAMI-1 (2): 224-227 """ X, labels = check_X_y(X, labels) le = LabelEncoder() labels = le.fit_transform(labels) n_samples, _ = X.shape n_labels = len(le.classes_) check_number_of_labels(n_labels, n_samples) intra_dists = np.zeros(n_labels) centroids = np.zeros((n_labels, len(X[0])), dtype=np.float) for k in range(n_labels): cluster_k = safe_indexing(X, labels == k) centroid = cluster_k.mean(axis=0) centroids[k] = centroid intra_dists[k] = np.average(pairwise_distances( cluster_k, [centroid])) # centroid_distances will contain zeros in the diagonal centroid_distances = pairwise_distances(centroids) if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0): return 0.0 # Compute score avoiding division by zero by adding an epsilon # this leads to high values in the diagonal's result score = (intra_dists[:, None] + intra_dists) / (centroid_distances + 1e-15) # Simply put the diagonal to zero score[np.eye(centroid_distances.shape[0]) == 1] = 0 # Here is the original code # score = (intra_dists[:, None] + intra_dists) / (centroid_distances) # score[score == np.inf] = np.nan return np.mean(np.nanmax(score, axis=1))