Python sklearn.metrics.euclidean_distances() Examples
The following are 21
code examples of sklearn.metrics.euclidean_distances().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics
, or try the search function
.
Example #1
Source File: _template.py From project-template with BSD 3-Clause "New" or "Revised" License | 6 votes |
def predict(self, X): """ A reference implementation of a prediction for a classifier. Parameters ---------- X : array-like, shape (n_samples, n_features) The input samples. Returns ------- y : ndarray, shape (n_samples,) The label for each sample is the label of the closest sample seen during fit. """ # Check is fit had been called check_is_fitted(self, ['X_', 'y_']) # Input validation X = check_array(X) closest = np.argmin(euclidean_distances(X, self.X_), axis=1) return self.y_[closest]
Example #2
Source File: test_affinity_propagation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_equal_similarities_and_preferences(): # Unequal distances X = np.array([[0, 0], [1, 1], [-2, -2]]) S = -euclidean_distances(X, squared=True) assert not _equal_similarities_and_preferences(S, np.array(0)) assert not _equal_similarities_and_preferences(S, np.array([0, 0])) assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Equal distances X = np.array([[0, 0], [1, 1]]) S = -euclidean_distances(X, squared=True) # Different preferences assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Same preferences assert _equal_similarities_and_preferences(S, np.array([0, 0])) assert _equal_similarities_and_preferences(S, np.array(0))
Example #3
Source File: test_estimation.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_shuffle_equal(verbose): # for this data set there shouldn't be any equal distances, # and shuffle should make no difference X, _ = make_classification(random_state=12354) dist = euclidean_distances(X) skew_shuffle, skew_no_shuffle = \ [Hubness(metric='precomputed', shuffle_equal=v, verbose=verbose) .fit(dist).score() for v in [True, False]] assert skew_no_shuffle == skew_shuffle
Example #4
Source File: test_affinity_propagation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation assert_raises(ValueError, affinity_propagation, S[:, :-1]) assert_raises(ValueError, affinity_propagation, S, damping=0) af = AffinityPropagation(affinity="unknown") assert_raises(ValueError, af.fit, X)
Example #5
Source File: test_random_projection.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_random_projection_embedding_quality(): data, _ = make_sparse_random_data(8, 5000, 15000) eps = 0.2 original_distances = euclidean_distances(data, squared=True) original_distances = original_distances.ravel() non_identical = original_distances != 0.0 # remove 0 distances to avoid division by 0 original_distances = original_distances[non_identical] for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', eps=eps, random_state=0) projected = rp.fit_transform(data) projected_distances = euclidean_distances(projected, squared=True) projected_distances = projected_distances.ravel() # remove 0 distances to avoid division by 0 projected_distances = projected_distances[non_identical] distances_ratio = projected_distances / original_distances # check that the automatically tuned values for the density respect the # contract for eps: pairwise distances are preserved according to the # Johnson-Lindenstrauss lemma assert_less(distances_ratio.max(), 1 + eps) assert_less(1 - eps, distances_ratio.min())
Example #6
Source File: test_mvmds.py From mvlearn with Apache License 2.0 | 5 votes |
def test_dissimilarity_precomputed_euclidean(data): test_views = [] for i in data['samp_views']: test_views.append(euclidean_distances(i)) mvmds1 = MVMDS(dissimilarity='euclidean') mvmds2 = MVMDS(dissimilarity='precomputed') fit1 = mvmds1.fit_transform(data['samp_views']) fit2 = mvmds2.fit_transform(test_views) np.testing.assert_almost_equal(np.abs(fit2), np.abs(fit1))
Example #7
Source File: RnaseqqcReport.py From CGATPipelines with MIT License | 5 votes |
def __call__(self, track, slice=None): # remove WHERE when table cleaned up to remove header rows statement = ( "SELECT transcript_id, TPM, sample_id FROM sailfish_transcripts") # fetch data df = pd.DataFrame.from_dict(self.getAll(statement)) df = df.pivot('transcript_id', 'sample_id')['TPM'] # calculate dissimilarities similarities = euclidean_distances(df.transpose()) # run MDS mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, dissimilarity="precomputed", n_jobs=1) mds = mds.fit(similarities) pos = pd.DataFrame(mds.embedding_) pos.columns = ["MD1", "MD2"] pos['sample'] = df.columns factors_df = self.getDataFrame( "SELECT * FROM factors WHERE factor != 'genome'") merged_df = pd.merge(pos, factors_df, left_on="sample", right_on="sample_id") return merged_df.reset_index().set_index("factor")
Example #8
Source File: _mdsw.py From scikit-multilearn with BSD 2-Clause "Simplified" License | 5 votes |
def fit_transform(self, X, y=None, init=None): """ Fit the data from X, and returns the embedded coordinates Parameters ---------- X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \ if dissimilarity='precomputed' Input data. init : {None or ndarray, shape (n_samples,)}, optional If None, randomly chooses the initial configuration if ndarray, initialize the SMACOF algorithm with this array. """ X = check_array(X) if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed": warnings.warn("The MDS API has changed. ``fit`` now constructs an" " dissimilarity matrix from data. To use a custom " "dissimilarity matrix, set " "``dissimilarity=precomputed``.") if self.dissimilarity == "precomputed": self.dissimilarity_matrix_ = X elif self.dissimilarity == "euclidean": self.dissimilarity_matrix_ = euclidean_distances(X) else: raise ValueError("Proximity must be 'precomputed' or 'euclidean'." " Got %s instead" % str(self.dissimilarity)) self.embedding_, self.stress_, self.n_iter_ = _smacof_w( self.dissimilarity_matrix_, self.n_uq, self.uq_weight, metric=self.metric, n_components=self.n_components, init=init, n_init=self.n_init, n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose, eps=self.eps, random_state=self.random_state, return_n_iter=True) return self.embedding_
Example #9
Source File: test_estimation.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_hubness_against_distance(has_self_distances): """Test hubness class against distance-based methods.""" np.random.seed(123) X = np.random.rand(100, 50) D = euclidean_distances(X) verbose = 1 hub = Hubness(k=10, metric='precomputed', store_k_occurrence=True, store_k_neighbors=True, ) hub.fit(D) skew_d = hub.score(has_self_distances=has_self_distances) neigh_d = hub.k_neighbors occ_d = hub.k_occurrence hub = Hubness(k=10, metric='euclidean', store_k_neighbors=True, store_k_occurrence=True, verbose=verbose) hub.fit(X) skew_v = hub.score(X if not has_self_distances else None) neigh_v = hub.k_neighbors occ_v = hub.k_occurrence np.testing.assert_allclose(skew_d, skew_v, atol=1e-7) np.testing.assert_array_equal(neigh_d, neigh_v) np.testing.assert_array_equal(occ_d, occ_v)
Example #10
Source File: test_estimation.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_sparse_equal_dense_if_variable_hits_per_row(shuffle_equal): X, _ = make_classification(random_state=123) dist = euclidean_distances(X) dist[0, 1:3] = 999 dist[1:3, 0] = 999 dist[1, 1:5] = 999 dist[1:5, 1] = 999 sparse = dist.copy() sparse[0, 1:3] = 0 sparse[1:3, 0] = 0 sparse[1, 1:5] = 0 sparse[1:5, 1] = 0 sparse = csr_matrix(sparse) hub = Hubness(metric='precomputed', shuffle_equal=shuffle_equal, random_state=123) hub.fit(dist) skew_dense = hub.score(has_self_distances=True) hub = Hubness(metric='precomputed', shuffle_equal=shuffle_equal, random_state=123) hub.fit(sparse) skew_sparse = hub.score(has_self_distances=True) np.testing.assert_almost_equal(skew_dense, skew_sparse, decimal=2)
Example #11
Source File: test_estimation.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_sparse_equal_dense(verbose, shuffle_equal): X, _ = make_classification() dist_dense = euclidean_distances(X) dist_sparse = csr_matrix(dist_dense) hub = Hubness(metric='precomputed', shuffle_equal=shuffle_equal, verbose=verbose) hub.fit(dist_dense) skew_dense = hub.score(has_self_distances=True) hub.fit(dist_sparse) skew_sparse = hub.score(has_self_distances=True) np.testing.assert_almost_equal(skew_dense, skew_sparse)
Example #12
Source File: mdsp.py From libact with BSD 2-Clause "Simplified" License | 5 votes |
def fit_transform(self, X, y=None, init=None): """ Fit the data from X, and returns the embedded coordinates Parameters ---------- X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \ if dissimilarity='precomputed' Input data. init : {None or ndarray, shape (n_samples,)}, optional If None, randomly chooses the initial configuration if ndarray, initialize the SMACOF algorithm with this array. """ X = check_array(X) if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed": warnings.warn("The MDS API has changed. ``fit`` now constructs an" " dissimilarity matrix from data. To use a custom " "dissimilarity matrix, set " "``dissimilarity=precomputed``.") if self.dissimilarity == "precomputed": self.dissimilarity_matrix_ = X elif self.dissimilarity == "euclidean": self.dissimilarity_matrix_ = euclidean_distances(X) else: raise ValueError("Proximity must be 'precomputed' or 'euclidean'." " Got %s instead" % str(self.dissimilarity)) self.embedding_, self.stress_, self.n_iter_ = smacof_p( self.dissimilarity_matrix_, self.n_uq, metric=self.metric, n_components=self.n_components, init=init, n_init=self.n_init, n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose, eps=self.eps, random_state=self.random_state, return_n_iter=True) return self.embedding_
Example #13
Source File: word_mover_distance.py From coling2018_fake-news-challenge with Apache License 2.0 | 5 votes |
def weighted_wmdistance(sent1_embs, sent2_embs, idfs, mean): wmd = 0.0 for token1, x in sent1_embs: min_dist = sys.float_info.max weight = idfs[token1] if token1 in idfs else mean for _, y in sent2_embs: print(x, x.shape()) print(y, y.shape()) score = weight * euclidean_distances(x,y) exit(0) if score < min_dist: min_dist = score wmd += min_dist return - float(wmd) / (len(sent1_embs) + len(sent2_embs))
Example #14
Source File: word_mover_distance.py From coling2018_fake-news-challenge with Apache License 2.0 | 5 votes |
def wmdistance(sent1_embs, sent2_embs): wmd = 0.0 for _,x in sent1_embs: min_dist = sys.float_info.max for _,y in sent2_embs: x = x.reshape(1, -1) y = y.reshape(1, -1) distance = euclidean_distances(x,y) if distance < min_dist: min_dist = distance wmd += min_dist return - float(wmd) / (len(sent1_embs) + len(sent2_embs)) # Note that this breaks the symmetry and is not a distance anymore: # To overcome this, we compute the average of the score in both side: (weigthedWMD(a,b) + weightedWMD(b,a))/2
Example #15
Source File: LFSBSS.py From fsfc with MIT License | 5 votes |
def predict(self, x): """ Predict clusters for one sample Parameters ---------- x: ndarray Samples to predict Returns ------- label: int Predicted cluster """ # Find the closest cluster to samples # To do it, project x to appropriate subspace, find distance to mean value and norm by variance min_score = None closest = None for i in range(self.clusters): projection = x[:, self.features_[i]] norm = euclidean_distances(projection, self.means_[i]) score = norm / self.vars_[i] if min_score is None or score < min_score: min_score = score closest = i return closest
Example #16
Source File: test_affinity_propagation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
Example #17
Source File: test_affinity_propagation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation assert_raises(ValueError, affinity_propagation, S[:, :-1]) assert_raises(ValueError, affinity_propagation, S, damping=0) af = AffinityPropagation(affinity="unknown") assert_raises(ValueError, af.fit, X)
Example #18
Source File: test_random_projection.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_random_projection_embedding_quality(): data, _ = make_sparse_random_data(8, 5000, 15000) eps = 0.2 original_distances = euclidean_distances(data, squared=True) original_distances = original_distances.ravel() non_identical = original_distances != 0.0 # remove 0 distances to avoid division by 0 original_distances = original_distances[non_identical] for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', eps=eps, random_state=0) projected = rp.fit_transform(data) projected_distances = euclidean_distances(projected, squared=True) projected_distances = projected_distances.ravel() # remove 0 distances to avoid division by 0 projected_distances = projected_distances[non_identical] distances_ratio = projected_distances / original_distances # check that the automatically tuned values for the density respect the # contract for eps: pairwise distances are preserved according to the # Johnson-Lindenstrauss lemma assert_less(distances_ratio.max(), 1 + eps) assert_less(1 - eps, distances_ratio.min())
Example #19
Source File: robust_soft_learning_vector_quantization.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _optimize(self, X, y): nb_prototypes = self.c_w_.size n_data, n_dim = X.shape prototypes = self.w_.reshape(nb_prototypes, n_dim) for i in range(n_data): xi = X[i] c_xi = int(y[i]) best_euclid_corr = np.inf best_euclid_incorr = np.inf # find nearest correct and nearest wrong prototype for j in range(prototypes.shape[0]): if self.c_w_[j] == c_xi: eucl_dis = euclidean_distances(xi.reshape(1, xi.size), prototypes[j] .reshape(1, prototypes[j] .size)) if eucl_dis < best_euclid_corr: best_euclid_corr = eucl_dis corr_index = j else: eucl_dis = euclidean_distances(xi.reshape(1, xi.size), prototypes[j] .reshape(1, prototypes[j] .size)) if eucl_dis < best_euclid_incorr: best_euclid_incorr = eucl_dis incorr_index = j # Update nearest wrong prototype and nearest correct prototype # if correct prototype isn't the nearest if best_euclid_incorr < best_euclid_corr: self._update_prototype(j=corr_index, c_xi=c_xi, xi=xi, prototypes=prototypes) self._update_prototype(j=incorr_index, c_xi=c_xi, xi=xi, prototypes=prototypes)
Example #20
Source File: mvmds.py From mvlearn with Apache License 2.0 | 4 votes |
def fit(self, Xs): """ Calculates dimensionally reduced components by inputting the Euclidean distances of each view, double centering them, and using the _commonpcs function to find common components between views. Works similarly to traditional, single-view Multidimensional Scaling. Parameters ---------- Xs: list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) """ if (self.n_components) > len(Xs[0]): self.n_components = len(Xs[0]) warnings.warn('The number of components you have requested is ' + 'greater than the number of samples in the ' + 'dataset. ' + str(self.n_components) + ' components were computed instead.') Xs = check_Xs(Xs, multiview=True) mat = np.ones(shape=(len(Xs), len(Xs[0]), len(Xs[0]))) # Double centering each view as in single-view MDS if (self.dissimilarity == 'euclidean'): for i in np.arange(len(Xs)): view = euclidean_distances(Xs[i]) view_squared = np.power(np.array(view), 2) J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape) B = -(1/2) * J @ view_squared @ J mat[i] = B # If user wants to input special distance matrix elif (self.dissimilarity == 'precomputed'): for i in np.arange(len(Xs)): if (Xs[i].shape[0] != Xs[i].shape[1]): raise ValueError('The input distance matrix must be ' + 'a square matrix') else: view = Xs[i] view_squared = np.power(np.array(view), 2) J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape) B = -(1/2) * J @ view_squared @ J mat[i] = B else: raise ValueError('The parameter `dissimilarity` must be one of \ {`euclidean`, `precomputed`}') self.components_ = self._commonpcs(mat) return self
Example #21
Source File: test_euclidean_distances.py From mars with Apache License 2.0 | 4 votes |
def testEuclideanDistancesExecution(self): dense_raw_x = np.random.rand(30, 10) dense_raw_y = np.random.rand(40, 10) sparse_raw_x = SparseNDArray(sps.random(30, 10, density=0.5, format='csr')) sparse_raw_y = SparseNDArray(sps.random(40, 10, density=0.5, format='csr')) for raw_x, raw_y in [(dense_raw_x, dense_raw_y), (sparse_raw_x, sparse_raw_y)]: x = mt.tensor(raw_x, chunk_size=9) y = mt.tensor(raw_y, chunk_size=7) distance = euclidean_distances(x, y) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, Y=raw_y) np.testing.assert_almost_equal(result, expected) x_norm = x.sum(axis=1)[..., np.newaxis] y_norm = y.sum(axis=1)[np.newaxis, ...] distance = euclidean_distances(x, y, X_norm_squared=x_norm, Y_norm_squared=y_norm) x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis] y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...] result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm, Y_norm_squared=y_raw_norm) np.testing.assert_almost_equal(result, expected) x_sq = (x ** 2).astype(np.float32) y_sq = (y ** 2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) x_raw_sq = (raw_x ** 2).astype(np.float32) y_raw_sq = (raw_y ** 2).astype(np.float32) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) np.testing.assert_almost_equal(result, expected, decimal=6) # test x is y distance = euclidean_distances(x) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x) np.testing.assert_almost_equal(result, expected)