Python sklearn.metrics.euclidean_distances() Examples

The following are 21 code examples of sklearn.metrics.euclidean_distances(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: _template.py    From project-template with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def predict(self, X):
        """ A reference implementation of a prediction for a classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input samples.

        Returns
        -------
        y : ndarray, shape (n_samples,)
            The label for each sample is the label of the closest sample
            seen during fit.
        """
        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest] 
Example #2
Source File: test_affinity_propagation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_equal_similarities_and_preferences():
    # Unequal distances
    X = np.array([[0, 0], [1, 1], [-2, -2]])
    S = -euclidean_distances(X, squared=True)

    assert not _equal_similarities_and_preferences(S, np.array(0))
    assert not _equal_similarities_and_preferences(S, np.array([0, 0]))
    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))

    # Equal distances
    X = np.array([[0, 0], [1, 1]])
    S = -euclidean_distances(X, squared=True)

    # Different preferences
    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))

    # Same preferences
    assert _equal_similarities_and_preferences(S, np.array([0, 0]))
    assert _equal_similarities_and_preferences(S, np.array(0)) 
Example #3
Source File: test_estimation.py    From scikit-hubness with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_shuffle_equal(verbose):
    # for this data set there shouldn't be any equal distances,
    # and shuffle should make no difference
    X, _ = make_classification(random_state=12354)
    dist = euclidean_distances(X)
    skew_shuffle, skew_no_shuffle = \
        [Hubness(metric='precomputed', shuffle_equal=v, verbose=verbose)
         .fit(dist).score() for v in [True, False]]
    assert skew_no_shuffle == skew_shuffle 
Example #4
Source File: test_affinity_propagation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)

    # Test input validation
    assert_raises(ValueError, affinity_propagation, S[:, :-1])
    assert_raises(ValueError, affinity_propagation, S, damping=0)
    af = AffinityPropagation(affinity="unknown")
    assert_raises(ValueError, af.fit, X) 
Example #5
Source File: test_random_projection.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_random_projection_embedding_quality():
    data, _ = make_sparse_random_data(8, 5000, 15000)
    eps = 0.2

    original_distances = euclidean_distances(data, squared=True)
    original_distances = original_distances.ravel()
    non_identical = original_distances != 0.0

    # remove 0 distances to avoid division by 0
    original_distances = original_distances[non_identical]

    for RandomProjection in all_RandomProjection:
        rp = RandomProjection(n_components='auto', eps=eps, random_state=0)
        projected = rp.fit_transform(data)

        projected_distances = euclidean_distances(projected, squared=True)
        projected_distances = projected_distances.ravel()

        # remove 0 distances to avoid division by 0
        projected_distances = projected_distances[non_identical]

        distances_ratio = projected_distances / original_distances

        # check that the automatically tuned values for the density respect the
        # contract for eps: pairwise distances are preserved according to the
        # Johnson-Lindenstrauss lemma
        assert_less(distances_ratio.max(), 1 + eps)
        assert_less(1 - eps, distances_ratio.min()) 
Example #6
Source File: test_mvmds.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def test_dissimilarity_precomputed_euclidean(data):
    test_views = []
    for i in data['samp_views']:
        test_views.append(euclidean_distances(i))
    mvmds1 = MVMDS(dissimilarity='euclidean')
    mvmds2 = MVMDS(dissimilarity='precomputed')

    fit1 = mvmds1.fit_transform(data['samp_views'])
    fit2 = mvmds2.fit_transform(test_views)

    np.testing.assert_almost_equal(np.abs(fit2), np.abs(fit1)) 
Example #7
Source File: RnaseqqcReport.py    From CGATPipelines with MIT License 5 votes vote down vote up
def __call__(self, track,  slice=None):

        # remove WHERE when table cleaned up to remove header rows
        statement = (
            "SELECT transcript_id, TPM, sample_id FROM sailfish_transcripts")

        # fetch data
        df = pd.DataFrame.from_dict(self.getAll(statement))

        df = df.pivot('transcript_id', 'sample_id')['TPM']

        # calculate dissimilarities
        similarities = euclidean_distances(df.transpose())

        # run MDS
        mds = manifold.MDS(n_components=2, max_iter=3000,
                           eps=1e-9, dissimilarity="precomputed", n_jobs=1)
        mds = mds.fit(similarities)
        pos = pd.DataFrame(mds.embedding_)

        pos.columns = ["MD1", "MD2"]
        pos['sample'] = df.columns

        factors_df = self.getDataFrame(
            "SELECT * FROM factors WHERE factor != 'genome'")

        merged_df = pd.merge(pos, factors_df,
                             left_on="sample", right_on="sample_id")
        return merged_df.reset_index().set_index("factor") 
Example #8
Source File: _mdsw.py    From scikit-multilearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit_transform(self, X, y=None, init=None):
        """
        Fit the data from X, and returns the embedded coordinates

        Parameters
        ----------
        X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \
                if dissimilarity='precomputed'
            Input data.

        init : {None or ndarray, shape (n_samples,)}, optional
            If None, randomly chooses the initial configuration
            if ndarray, initialize the SMACOF algorithm with this array.

        """
        X = check_array(X)
        if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
            warnings.warn("The MDS API has changed. ``fit`` now constructs an"
                          " dissimilarity matrix from data. To use a custom "
                          "dissimilarity matrix, set "
                          "``dissimilarity=precomputed``.")

        if self.dissimilarity == "precomputed":
            self.dissimilarity_matrix_ = X
        elif self.dissimilarity == "euclidean":
            self.dissimilarity_matrix_ = euclidean_distances(X)
        else:
            raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
                             " Got %s instead" % str(self.dissimilarity))

        self.embedding_, self.stress_, self.n_iter_ = _smacof_w(
            self.dissimilarity_matrix_, self.n_uq, self.uq_weight, metric=self.metric,
            n_components=self.n_components, init=init, n_init=self.n_init,
            n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
            eps=self.eps, random_state=self.random_state,
            return_n_iter=True)

        return self.embedding_ 
Example #9
Source File: test_estimation.py    From scikit-hubness with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_hubness_against_distance(has_self_distances):
    """Test hubness class against distance-based methods."""

    np.random.seed(123)
    X = np.random.rand(100, 50)
    D = euclidean_distances(X)
    verbose = 1

    hub = Hubness(k=10, metric='precomputed',
                  store_k_occurrence=True,
                  store_k_neighbors=True,
                  )
    hub.fit(D)
    skew_d = hub.score(has_self_distances=has_self_distances)
    neigh_d = hub.k_neighbors
    occ_d = hub.k_occurrence

    hub = Hubness(k=10, metric='euclidean',
                  store_k_neighbors=True,
                  store_k_occurrence=True,
                  verbose=verbose)
    hub.fit(X)
    skew_v = hub.score(X if not has_self_distances else None)
    neigh_v = hub.k_neighbors
    occ_v = hub.k_occurrence

    np.testing.assert_allclose(skew_d, skew_v, atol=1e-7)
    np.testing.assert_array_equal(neigh_d, neigh_v)
    np.testing.assert_array_equal(occ_d, occ_v) 
Example #10
Source File: test_estimation.py    From scikit-hubness with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_sparse_equal_dense_if_variable_hits_per_row(shuffle_equal):
    X, _ = make_classification(random_state=123)
    dist = euclidean_distances(X)
    dist[0, 1:3] = 999
    dist[1:3, 0] = 999
    dist[1, 1:5] = 999
    dist[1:5, 1] = 999
    sparse = dist.copy()
    sparse[0, 1:3] = 0
    sparse[1:3, 0] = 0
    sparse[1, 1:5] = 0
    sparse[1:5, 1] = 0
    sparse = csr_matrix(sparse)

    hub = Hubness(metric='precomputed',
                  shuffle_equal=shuffle_equal,
                  random_state=123)
    hub.fit(dist)
    skew_dense = hub.score(has_self_distances=True)

    hub = Hubness(metric='precomputed',
                  shuffle_equal=shuffle_equal,
                  random_state=123)
    hub.fit(sparse)
    skew_sparse = hub.score(has_self_distances=True)

    np.testing.assert_almost_equal(skew_dense, skew_sparse, decimal=2) 
Example #11
Source File: test_estimation.py    From scikit-hubness with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_sparse_equal_dense(verbose, shuffle_equal):
    X, _ = make_classification()
    dist_dense = euclidean_distances(X)
    dist_sparse = csr_matrix(dist_dense)

    hub = Hubness(metric='precomputed',
                  shuffle_equal=shuffle_equal,
                  verbose=verbose)
    hub.fit(dist_dense)
    skew_dense = hub.score(has_self_distances=True)

    hub.fit(dist_sparse)
    skew_sparse = hub.score(has_self_distances=True)

    np.testing.assert_almost_equal(skew_dense, skew_sparse) 
Example #12
Source File: mdsp.py    From libact with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit_transform(self, X, y=None, init=None):
        """
        Fit the data from X, and returns the embedded coordinates

        Parameters
        ----------
        X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \
                if dissimilarity='precomputed'
            Input data.

        init : {None or ndarray, shape (n_samples,)}, optional
            If None, randomly chooses the initial configuration
            if ndarray, initialize the SMACOF algorithm with this array.

        """
        X = check_array(X)
        if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
            warnings.warn("The MDS API has changed. ``fit`` now constructs an"
                          " dissimilarity matrix from data. To use a custom "
                          "dissimilarity matrix, set "
                          "``dissimilarity=precomputed``.")

        if self.dissimilarity == "precomputed":
            self.dissimilarity_matrix_ = X
        elif self.dissimilarity == "euclidean":
            self.dissimilarity_matrix_ = euclidean_distances(X)
        else:
            raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
                             " Got %s instead" % str(self.dissimilarity))

        self.embedding_, self.stress_, self.n_iter_ = smacof_p(
            self.dissimilarity_matrix_, self.n_uq, metric=self.metric,
            n_components=self.n_components, init=init, n_init=self.n_init,
            n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
            eps=self.eps, random_state=self.random_state,
            return_n_iter=True)

        return self.embedding_ 
Example #13
Source File: word_mover_distance.py    From coling2018_fake-news-challenge with Apache License 2.0 5 votes vote down vote up
def weighted_wmdistance(sent1_embs, sent2_embs, idfs, mean):
    wmd = 0.0
    for token1, x in sent1_embs:
        min_dist = sys.float_info.max
        weight = idfs[token1] if token1 in idfs else mean
        for _, y in sent2_embs:
            print(x, x.shape())
            print(y, y.shape())
            score = weight * euclidean_distances(x,y) 
            exit(0)
            if score < min_dist:
                min_dist = score
        wmd += min_dist
    return - float(wmd) / (len(sent1_embs) + len(sent2_embs)) 
Example #14
Source File: word_mover_distance.py    From coling2018_fake-news-challenge with Apache License 2.0 5 votes vote down vote up
def wmdistance(sent1_embs, sent2_embs):
    wmd = 0.0
    for _,x in sent1_embs:
        min_dist = sys.float_info.max
        for _,y in sent2_embs:
            x = x.reshape(1, -1)
            y = y.reshape(1, -1)
            distance = euclidean_distances(x,y)
            if distance < min_dist:
                min_dist = distance
        wmd += min_dist
    return - float(wmd) / (len(sent1_embs) + len(sent2_embs))
    
# Note that this breaks the symmetry and is not a distance anymore:
# To overcome this, we compute the average of the score in both side: (weigthedWMD(a,b) + weightedWMD(b,a))/2 
Example #15
Source File: LFSBSS.py    From fsfc with MIT License 5 votes vote down vote up
def predict(self, x):
        """
        Predict clusters for one sample

        Parameters
        ----------
        x: ndarray
            Samples to predict

        Returns
        -------
        label: int
            Predicted cluster
        """

        # Find the closest cluster to samples
        # To do it, project x to appropriate subspace, find distance to mean value and norm by variance
        min_score = None
        closest = None
        for i in range(self.clusters):
            projection = x[:, self.features_[i]]
            norm = euclidean_distances(projection, self.means_[i])
            score = norm / self.vars_[i]
            if min_score is None or score < min_score:
                min_score = score
                closest = i
        return closest 
Example #16
Source File: test_affinity_propagation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(
        affinity_propagation, S, preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels) 
Example #17
Source File: test_affinity_propagation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)

    # Test input validation
    assert_raises(ValueError, affinity_propagation, S[:, :-1])
    assert_raises(ValueError, affinity_propagation, S, damping=0)
    af = AffinityPropagation(affinity="unknown")
    assert_raises(ValueError, af.fit, X) 
Example #18
Source File: test_random_projection.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_random_projection_embedding_quality():
    data, _ = make_sparse_random_data(8, 5000, 15000)
    eps = 0.2

    original_distances = euclidean_distances(data, squared=True)
    original_distances = original_distances.ravel()
    non_identical = original_distances != 0.0

    # remove 0 distances to avoid division by 0
    original_distances = original_distances[non_identical]

    for RandomProjection in all_RandomProjection:
        rp = RandomProjection(n_components='auto', eps=eps, random_state=0)
        projected = rp.fit_transform(data)

        projected_distances = euclidean_distances(projected, squared=True)
        projected_distances = projected_distances.ravel()

        # remove 0 distances to avoid division by 0
        projected_distances = projected_distances[non_identical]

        distances_ratio = projected_distances / original_distances

        # check that the automatically tuned values for the density respect the
        # contract for eps: pairwise distances are preserved according to the
        # Johnson-Lindenstrauss lemma
        assert_less(distances_ratio.max(), 1 + eps)
        assert_less(1 - eps, distances_ratio.min()) 
Example #19
Source File: robust_soft_learning_vector_quantization.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _optimize(self, X, y):
        nb_prototypes = self.c_w_.size

        n_data, n_dim = X.shape
        prototypes = self.w_.reshape(nb_prototypes, n_dim)

        for i in range(n_data):
            xi = X[i]
            c_xi = int(y[i])
            best_euclid_corr = np.inf
            best_euclid_incorr = np.inf

            # find nearest correct and nearest wrong prototype
            for j in range(prototypes.shape[0]):
                if self.c_w_[j] == c_xi:
                    eucl_dis = euclidean_distances(xi.reshape(1, xi.size),
                                                   prototypes[j]
                                                   .reshape(1, prototypes[j]
                                                   .size))
                    if eucl_dis < best_euclid_corr:
                        best_euclid_corr = eucl_dis
                        corr_index = j
                else:
                    eucl_dis = euclidean_distances(xi.reshape(1, xi.size),
                                                   prototypes[j]
                                                   .reshape(1, prototypes[j]
                                                   .size))
                    if eucl_dis < best_euclid_incorr:
                        best_euclid_incorr = eucl_dis
                        incorr_index = j

            # Update nearest wrong prototype and nearest correct prototype
            # if correct prototype isn't the nearest
            if best_euclid_incorr < best_euclid_corr:
                self._update_prototype(j=corr_index, c_xi=c_xi, xi=xi,
                                       prototypes=prototypes)
                self._update_prototype(j=incorr_index, c_xi=c_xi, xi=xi,
                                       prototypes=prototypes) 
Example #20
Source File: mvmds.py    From mvlearn with Apache License 2.0 4 votes vote down vote up
def fit(self, Xs):
        """
        Calculates dimensionally reduced components by inputting the Euclidean
        distances of each view, double centering them, and using the _commonpcs
        function to find common components between views. Works similarly to
        traditional, single-view Multidimensional Scaling.

        Parameters
        ----------
        Xs: list of array-likes or numpy.ndarray
                - Xs length: n_views
                - Xs[i] shape: (n_samples, n_features_i)

        """

        if (self.n_components) > len(Xs[0]):
            self.n_components = len(Xs[0])
            warnings.warn('The number of components you have requested is '
                          + 'greater than the number of samples in the '
                          + 'dataset. ' + str(self.n_components)
                          + ' components were computed instead.')

        Xs = check_Xs(Xs, multiview=True)

        mat = np.ones(shape=(len(Xs), len(Xs[0]), len(Xs[0])))

        # Double centering each view as in single-view MDS

        if (self.dissimilarity == 'euclidean'):

            for i in np.arange(len(Xs)):
                view = euclidean_distances(Xs[i])
                view_squared = np.power(np.array(view), 2)

                J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
                B = -(1/2) * J @ view_squared @ J
                mat[i] = B

        # If user wants to input special distance matrix

        elif (self.dissimilarity == 'precomputed'):
            for i in np.arange(len(Xs)):
                if (Xs[i].shape[0] != Xs[i].shape[1]):
                    raise ValueError('The input distance matrix must be '
                                     + 'a square matrix')
                else:
                    view = Xs[i]
                    view_squared = np.power(np.array(view), 2)
                    J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
                    B = -(1/2) * J @ view_squared @ J
                    mat[i] = B
        else:
            raise ValueError('The parameter `dissimilarity` must be one of \
                {`euclidean`, `precomputed`}')

        self.components_ = self._commonpcs(mat)

        return self 
Example #21
Source File: test_euclidean_distances.py    From mars with Apache License 2.0 4 votes vote down vote up
def testEuclideanDistancesExecution(self):
        dense_raw_x = np.random.rand(30, 10)
        dense_raw_y = np.random.rand(40, 10)
        sparse_raw_x = SparseNDArray(sps.random(30, 10, density=0.5, format='csr'))
        sparse_raw_y = SparseNDArray(sps.random(40, 10, density=0.5, format='csr'))

        for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                             (sparse_raw_x, sparse_raw_y)]:
            x = mt.tensor(raw_x, chunk_size=9)
            y = mt.tensor(raw_y, chunk_size=7)

            distance = euclidean_distances(x, y)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, Y=raw_y)
            np.testing.assert_almost_equal(result, expected)

            x_norm = x.sum(axis=1)[..., np.newaxis]
            y_norm = y.sum(axis=1)[np.newaxis, ...]
            distance = euclidean_distances(x, y, X_norm_squared=x_norm,
                                           Y_norm_squared=y_norm)
            x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
            y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm,
                                              Y_norm_squared=y_raw_norm)
            np.testing.assert_almost_equal(result, expected)

            x_sq = (x ** 2).astype(np.float32)
            y_sq = (y ** 2).astype(np.float32)

            distance = euclidean_distances(x_sq, y_sq, squared=True)

            x_raw_sq = (raw_x ** 2).astype(np.float32)
            y_raw_sq = (raw_y ** 2).astype(np.float32)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
            np.testing.assert_almost_equal(result, expected, decimal=6)

            # test x is y
            distance = euclidean_distances(x)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x)

            np.testing.assert_almost_equal(result, expected)