Python Examples of scipy.spatial.distance.squareform

Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64, copy=False)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)

Source File: dataset.py From neural-combinatorial-optimization-rl-tensorflow with MIT License

6 votes

def k_nearest_neighbor(self, sequence):
        # Calculate dist_matrix
        dist_array = pdist(sequence)
        dist_matrix = squareform(dist_array)
        # Construct tour
        new_sequence = [sequence[0]]
        current_city = 0
        visited_cities = [0]
        for i in range(1,len(sequence)):
            j = np.random.randint(0,min(len(sequence)-i,self.kNN))
            next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j]
            visited_cities.append(next_city)
            new_sequence.append(sequence[next_city])
            current_city = next_city
        return np.asarray(new_sequence)


    # Generate random TSP-TW instance

Source File: coords2sort_order.py From pyscf with Apache License 2.0

6 votes

def coords2sort_order(a2c):
  """ Delivers a list of atom indices which generates a near-diagonal overlap for a given set of atom coordinates """
  na  = a2c.shape[0]
  aa2d = squareform(pdist(a2c))
  mxd = np.amax(aa2d)+1.0
  a = 0
  lsa = []
  for ia in range(na):
    lsa.append(a)
    asrt = np.argsort(aa2d[a])
    for ja in range(1,na):
      b = asrt[ja]
      if b not in lsa: break
    aa2d[a,b] = aa2d[b,a] = mxd
    a = b
  return np.array(lsa)

Source File: post_proc.py From HorizonNet with MIT License

6 votes

def vote(vec, tol):
    vec = np.sort(vec)
    n = np.arange(len(vec))[::-1]
    n = n[:, None] - n[None, :] + 1.0
    l = squareform(pdist(vec[:, None], 'minkowski', p=1) + 1e-9)

    invalid = (n < len(vec) * 0.4) | (l > tol)
    if (~invalid).sum() == 0 or len(vec) < tol:
        best_fit = np.median(vec)
        p_score = 0
    else:
        l[invalid] = 1e5
        n[invalid] = -1
        score = n
        max_idx = score.argmax()
        max_row = max_idx // len(vec)
        max_col = max_idx % len(vec)
        assert max_col > max_row
        best_fit = vec[max_row:max_col+1].mean()
        p_score = (max_col - max_row + 1) / len(vec)

    l1_score = np.abs(vec - best_fit).mean()

    return best_fit, p_score, l1_score

Source File: decisionboundaryplot.py From highdimensional-decision-boundary-plot with MIT License

6 votes

def _get_sorted_db_keypoint_distances(self, N=None):
        """Use a minimum spanning tree heuristic to find the N largest gaps in the
        line constituted by the current decision boundary keypoints.
        """
        if N == None:
            N = self.n_interpolated_keypoints
        edges = minimum_spanning_tree(
            squareform(pdist(self.decision_boundary_points_2d))
        )
        edged = np.array(
            [
                euclidean(
                    self.decision_boundary_points_2d[u],
                    self.decision_boundary_points_2d[v],
                )
                for u, v in edges
            ]
        )
        gap_edge_idx = np.argsort(edged)[::-1][: int(N)]
        edges = edges[gap_edge_idx]
        gap_distances = np.square(edged[gap_edge_idx])
        gap_probability_scores = gap_distances / np.sum(gap_distances)
        return edges, gap_distances, gap_probability_scores

Source File: Variogram.py From scikit-gstat with MIT License

6 votes

def __vdiff_indexer(self):
        """Pairwise indexer

        Returns an iterator over the values or coordinates in squareform
        coordinates. The iterable will be of type tuple.

        Returns
        -------
        iterable

        """
        l = len(self.values)

        for i in range(l):
            for j in range(l):
                if i < j:
                    yield i, j

Source File: test_nearest_neighbors.py From openTSNE with BSD 3-Clause "New" or "Revised" License

6 votes

def test_cosine_distance(self):
        k = 15
        # Compute cosine distance nearest neighbors using ball tree
        knn_index = nearest_neighbors.BallTree("cosine")
        indices, distances = knn_index.build(self.x1, k=k)

        # Compute the exact nearest neighbors as a reference
        true_distances = squareform(pdist(self.x1, metric="cosine"))
        true_indices_ = np.argsort(true_distances, axis=1)[:, 1:k + 1]
        true_distances_ = np.vstack([d[i] for d, i in zip(true_distances, true_indices_)])

        np.testing.assert_array_equal(
            indices, true_indices_, err_msg="Nearest neighbors do not match"
        )
        np.testing.assert_array_equal(
            distances, true_distances_, err_msg="Distances do not match"
        )

Source File: Variogram.py From scikit-gstat with MIT License

6 votes

def value_matrix(self):
        """Value matrix

        Returns a matrix of pairwise differences in absolute values. The
        matrix will have the shape (m, m) with m = len(Variogram.values).
        Note that Variogram.values holds the values themselves, while the
        value_matrix consists of their pairwise differences.

        Returns
        -------
        values : numpy.matrix
            Matrix of pairwise absolute differences of the values.

        See Also
        --------
        Variogram._diff

        """
        return squareform(self._diff)

Source File: squareform.py From mars with Apache License 2.0

6 votes

def _execute_map(cls, ctx, op):
        inputs, device_id, xp = as_same_device(
            [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True)

        if len(inputs) == 2 and not inputs[1]:
            # check fail
            raise ValueError('Distance matrix X must be symmetric.')

        if xp is cp:  # pragma: no cover
            raise NotImplementedError('`squareform` does not support running on GPU yet')

        with device(device_id):
            x = inputs[0]
            if x.ndim == 1:
                cls._to_matrix(ctx, xp, x, op)
            else:
                cls._to_vector(ctx, xp, x, op)

Source File: squareform.py From mars with Apache License 2.0

6 votes

def execute(cls, ctx, op):
        if op.stage == OperandStage.map:
            cls._execute_map(ctx, op)
        elif op.stage == OperandStage.reduce:
            cls._execute_reduce(ctx, op)
        else:
            from scipy.spatial.distance import squareform

            (x,), device_id, xp = as_same_device(
                [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True)

            if xp is cp:  # pragma: no cover
                raise NotImplementedError('`squareform` does not support running on GPU yet')

            with device(device_id):
                ctx[op.outputs[0].key] = squareform(x, checks=op.checks)

Source File: safe_io.py From safepy with GNU General Public License v3.0

6 votes

def calculate_edge_lengths(G, verbose=True):

    # Calculate the lengths of the edges

    if verbose:
        print('Calculating edge lengths...')

    x = np.matrix(G.nodes.data('x'))[:, 1]
    y = np.matrix(G.nodes.data('y'))[:, 1]

    node_coordinates = np.concatenate([x, y], axis=1)
    node_distances = squareform(pdist(node_coordinates, 'euclidean'))

    adjacency_matrix = np.array(nx.adjacency_matrix(G).todense())
    adjacency_matrix = adjacency_matrix.astype('float')
    adjacency_matrix[adjacency_matrix == 0] = np.nan

    edge_lengths = np.multiply(node_distances, adjacency_matrix)

    edge_attr_dict = {index: v for index, v in np.ndenumerate(edge_lengths) if ~np.isnan(v)}
    nx.set_edge_attributes(G, edge_attr_dict, 'length')

    return G

Source File: post_proc2.py From LayoutNetv2 with MIT License

6 votes

def vote(vec, tol):
    vec = np.sort(vec)
    n = np.arange(len(vec))[::-1]
    n = n[:, None] - n[None, :] + 1.0
    l = squareform(pdist(vec[:, None], 'minkowski', p=1) + 1e-9)

    invalid = (n < len(vec) * 0.4) | (l > tol)
    if (~invalid).sum() == 0 or len(vec) < tol:
        best_fit = np.median(vec)
        p_score = 0
    else:
        l[invalid] = 1e5
        n[invalid] = -1
        score = n
        max_idx = score.argmax()
        max_row = max_idx // len(vec)
        max_col = max_idx % len(vec)
        assert max_col > max_row
        best_fit = vec[max_row:max_col+1].mean()
        p_score = (max_col - max_row + 1) / len(vec)

    l1_score = np.abs(vec - best_fit).mean()

    return best_fit, p_score, l1_score

Source File: test_isc.py From brainiak with Apache License 2.0

6 votes

def correlated_timeseries(n_subjects, n_TRs, noise=0,
                          random_state=None):
    prng = np.random.RandomState(random_state)
    signal = prng.randn(n_TRs)
    correlated = True
    while correlated:
        uncorrelated = np.random.randn(n_TRs,
                                       n_subjects)[:, np.newaxis, :]
        unc_max = np.amax(squareform(np.corrcoef(
            uncorrelated[:, 0, :].T), checks=False))
        unc_mean = np.mean(squareform(np.corrcoef(
            uncorrelated[:, 0, :].T), checks=False))
        if unc_max < .3 and np.abs(unc_mean) < .001:
            correlated = False
    data = np.repeat(np.column_stack((signal, signal))[..., np.newaxis],
                     20, axis=2)
    data = np.concatenate((data, uncorrelated), axis=1)
    data = data + np.random.randn(n_TRs, 3, n_subjects) * noise
    return data


# Compute ISCs using different input types
# List of subjects with one voxel/ROI

Source File: bayesian_nn.py From Stein-Variational-Gradient-Descent with MIT License

6 votes

def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel
        
        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

Source File: svgd.py From Stein-Variational-Gradient-Descent with MIT License

6 votes

def svgd_kernel(self, theta, h = -1):
        sq_dist = pdist(theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))

        # compute the rbf kernel
        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

Source File: kernels.py From BrainSpace with BSD 3-Clause "New" or "Revised" License

6 votes

def _build_kernel(x, kernel, gamma=None):

    if kernel in {'pearson', 'spearman'}:
        if kernel == 'spearman':
            x = np.apply_along_axis(rankdata, 1, x)
        return np.corrcoef(x)

    if kernel in {'cosine', 'normalized_angle'}:
        x = 1 - squareform(pdist(x, metric='cosine'))
        if kernel == 'normalized_angle':
            x = 1 - np.arccos(x, x)/np.pi
        return x

    if kernel == 'gaussian':
        if gamma is None:
            gamma = 1 / x.shape[1]
        return rbf_kernel(x, gamma=gamma)

    if callable(kernel):
        return kernel(x)

    raise ValueError("Unknown kernel '{0}'.".format(kernel))

Source File: test_dbscan.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_dbscan_similarity():
    # Tests the DBSCAN algorithm with a similarity array.
    # Parameters chosen specifically for this task.
    eps = 0.15
    min_samples = 10
    # Compute similarities
    D = distance.squareform(distance.pdist(X))
    D /= np.max(D)
    # Compute DBSCAN
    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
                                  min_samples=min_samples)
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0)

    assert_equal(n_clusters_1, n_clusters)

    db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples)
    labels = db.fit(D).labels_

    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)

Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_euclidean_distances_sym(dtype, x_array_constr):
    # check that euclidean distances gives same result as scipy pdist
    # when only X is provided
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10)).astype(dtype, copy=False)
    X[X < 0.8] = 0

    expected = squareform(pdist(X))

    X = x_array_constr(X)
    distances = euclidean_distances(X)

    # the default rtol=1e-7 is too close to the float32 precision
    # and fails due too rounding errors.
    assert_allclose(distances, expected, rtol=1e-6)
    assert distances.dtype == dtype

Source File: kernel.py From m-phate with GNU General Public License v3.0

5 votes

def square_pdist(X):
    return distance.squareform(distance.pdist(X))

Source File: Variogram.py From scikit-gstat with MIT License

5 votes

def scattergram(self, ax=None, show=True):

        # create a new plot or use the given
        if ax is None:
            fig, ax = plt.subplots(1, 1)
        else:
            fig = ax.get_figure()

        tail = np.empty(0)
        head = tail.copy()

        for h in np.unique(self.lag_groups()):
            # get the head and tail
            x, y = np.where(squareform(self.lag_groups()) == h)

            # concatenate
            tail = np.concatenate((tail, self.values[x]))
            head = np.concatenate((head, self.values[y]))

        # plot the mean on tail and head
        ax.vlines(np.mean(tail), np.min(tail), np.max(tail), linestyles='--',
                  color='red', lw=2)
        ax.hlines(np.mean(head), np.min(head), np.max(head), linestyles='--',
                  color='red', lw=2)
        # plot
        ax.scatter(tail, head, 10, marker='o', color='orange')

        # annotate
        ax.set_ylabel('head')
        ax.set_xlabel('tail')

        # show the figure
        if show:  # pragma: no cover
            fig.show()

        return fig

Source File: segmenter.py From msaf with MIT License

5 votes

def compute_ssm(X, metric="seuclidean"):
    """Computes the self-similarity matrix of X."""
    D = distance.pdist(X, metric=metric)
    D = distance.squareform(D)
    D /= float(D.max())
    return 1 - D

Source File: _hierarchy.py From q2-qemistree with BSD 2-Clause "Simplified" License

5 votes

def build_tree(relabeled_fingerprints: pd.DataFrame,
               metric: str = 'euclidean') -> TreeNode:
    '''
    This function makes a tree of relatedness between mass-spectrometry
    features using molecular substructure fingerprints.
    '''
    distmat = pairwise_distances(X=relabeled_fingerprints.values,
                                 Y=None, metric=metric)
    distsq = squareform(distmat, checks=False)
    linkage_matrix = linkage(distsq, method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix,
                                        relabeled_fingerprints.index.tolist())
    return tree

Source File: segmenter.py From msaf with MIT License

5 votes

def compute_ssm(X, metric="seuclidean"):
    """Computes the self-similarity matrix of X."""
    D = distance.pdist(X, metric=metric)
    D = distance.squareform(D)
    D /= D.max()
    return 1 - D

Source File: atlas3.py From ssbio with MIT License

5 votes

def remove_correlated_feats(df):
    tmp = df.T
    # Remove columns with no variation
    nunique = tmp.apply(pd.Series.nunique)
    cols_to_drop = nunique[nunique == 1].index
    tmp.drop(cols_to_drop, axis=1, inplace=True)

    perc_spearman = scipy.stats.spearmanr(tmp)
    abs_corr = np.subtract(np.ones(shape=perc_spearman.correlation.shape),
                           np.absolute(perc_spearman.correlation))
    np.fill_diagonal(abs_corr, 0)
    abs_corr_clean = np.maximum(abs_corr,
                                abs_corr.transpose())  # some floating point mismatches, just make symmetric
    clustering = linkage(squareform(abs_corr_clean), method='average')
    clusters = fcluster(clustering, .1, criterion='distance')
    names = tmp.columns.tolist()
    names_to_cluster = list(zip(names, clusters))
    indices_to_keep = []
    ### Extract models closest to cluster centroids
    for x in range(1, len(set(clusters)) + 1):
        # Create mask from the list of assignments for extracting submatrix of the cluster
        mask = np.array([1 if i == x else 0 for i in clusters], dtype=bool)

        # Take the index of the column with the smallest sum of distances from the submatrix
        idx = np.argmin(sum(abs_corr_clean[:, mask][mask, :]))

        # Extract names of cluster elements from names_to_cluster
        sublist = [name for (name, cluster) in names_to_cluster if cluster == x]

        # Element closest to centroid
        centroid = sublist[idx]
        indices_to_keep.append(centroid)

    return df.loc[df.index.isin(indices_to_keep)]

Source File: candidates.py From luna16 with BSD 2-Clause "Simplified" License

5 votes

def merge_candidates_scan(candidates, seriesuid, distance=5.):
    distances = pdist(candidates, metric='euclidean')
    adjacency_matrix = squareform(distances)

    # Determine nodes within distance, replace by 1 (=adjacency matrix)
    adjacency_matrix = np.where(adjacency_matrix<=distance,1,0)

    # Determine all connected components in the graph
    n, labels = connected_components(adjacency_matrix)
    new_candidates = np.zeros((n,3))

    # Take the mean for these connected components
    for cluster_i in range(n):
        points = candidates[np.where(labels==cluster_i)]
        center = np.mean(points,axis=0)
        new_candidates[cluster_i,:] = center

    x = new_candidates[:,0]
    y = new_candidates[:,1]
    z = new_candidates[:,2]
    labels = [seriesuid]*len(x)
    class_name = [0]*len(x)

    data= zip(labels,x,y,z,class_name)

    new_candidates = pd.DataFrame(data,columns=CANDIDATES_COLUMNS)

    return new_candidates

Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3)

Source File: relieff.py From scikit-rebate with MIT License

5 votes

def _distarray_no_missing(self, xc, xd):
        """Distance array calculation for data with no missing values. The 'pdist() function outputs a condense distance array, and squareform() converts this vector-form
        distance vector to a square-form, redundant distance matrix.
        *This could be a target for saving memory in the future, by not needing to expand to the redundant square-form matrix. """
        from scipy.spatial.distance import pdist, squareform

        #------------------------------------------#
        def pre_normalize(x):
            """Normalizes continuous features so they are in the same range (0 to 1)"""
            idx = 0
            # goes through all named features (doesn really need to) this method is only applied to continuous features
            for i in sorted(self.attr.keys()):
                if self.attr[i][0] == 'discrete':
                    continue
                cmin = self.attr[i][2]
                diff = self.attr[i][3]
                x[:, idx] -= cmin
                x[:, idx] /= diff
                idx += 1
            return x
        #------------------------------------------#

        if self.data_type == 'discrete':  # discrete features only
            return squareform(pdist(self._X, metric='hamming'))
        elif self.data_type == 'mixed':  # mix of discrete and continuous features
            d_dist = squareform(pdist(xd, metric='hamming'))
            # Cityblock is also known as Manhattan distance
            c_dist = squareform(pdist(pre_normalize(xc), metric='cityblock'))
            return np.add(d_dist, c_dist) / self._num_attributes

        else: #continuous features only
            #xc = pre_normalize(xc)
            return squareform(pdist(pre_normalize(xc), metric='cityblock'))

    #==================================================================#

Source File: Utility.py From fuku-ml with MIT License

5 votes

def kernel_matrix(svm_model, original_X):

        if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
            K = (svm_model.zeta + svm_model.gamma * np.dot(original_X, original_X.T)) ** svm_model.Q
        elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
            pairwise_dists = squareform(pdist(original_X, 'euclidean'))
            K = np.exp(-svm_model.gamma * (pairwise_dists ** 2))

        '''
        K = np.zeros((svm_model.data_num, svm_model.data_num))

        for i in range(svm_model.data_num):
            for j in range(svm_model.data_num):
                if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
                    K[i, j] = Kernel.polynomial_kernel(svm_model, original_X[i], original_X[j])
                elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
                    K[i, j] = Kernel.gaussian_kernel(svm_model, original_X[i], original_X[j])
        '''

        return K

Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    for i in range(3):
        X = random_state.randn(100, 2)
        D = squareform(pdist(X), "sqeuclidean")
        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                    early_exaggeration=2.0, metric="precomputed",
                    random_state=i, verbose=0)
        X_embedded = tsne.fit_transform(D)
        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
        assert t > .95

Source File: kernels.py From CatLearn with GNU General Public License v3.0

5 votes

def sqe_kernel(theta, log_scale, m1, m2=None, eval_gradients=False):
    """Generate the covariance between data with a Gaussian kernel.

    Parameters
    ----------
    theta : list
        A list of widths for each feature.
    log_scale : boolean
        Scaling hyperparameters in the kernel can be useful for optimization.
    m1 : list
        A list of the training fingerprint vectors.
    m2 : list
        A list of the training fingerprint vectors.

    Returns
    -------
    k : array
        The covariance matrix.
    """
    if eval_gradients:
        msg = 'Evaluation of the gradients for this kernel is not yet '
        msg += 'implemented'
        raise NotImplementedError(msg)

    kwidth = theta
    if log_scale:
        kwidth = np.exp(kwidth)

    if m2 is None:
        k = distance.pdist(m1, metric='seuclidean', V=kwidth)
        k = distance.squareform(np.exp(-.5 * k))
        np.fill_diagonal(k, 1)
    else:
        k = distance.cdist(m1, m2, metric='seuclidean', V=kwidth)
        k = np.exp(-.5 * k)

    return k

Python scipy.spatial.distance.squareform() Examples