Python scipy.spatial.distance.squareform() Examples
The following are 30
code examples of scipy.spatial.distance.squareform().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.spatial.distance
, or try the search function
.
Example #1
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64, copy=False) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) from scipy.sparse import csr_matrix P = csr_matrix(pij_input) neighbors = P.indices.astype(np.int64) indptr = P.indptr.astype(np.int64) _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
Example #2
Source File: dataset.py From neural-combinatorial-optimization-rl-tensorflow with MIT License | 6 votes |
def k_nearest_neighbor(self, sequence): # Calculate dist_matrix dist_array = pdist(sequence) dist_matrix = squareform(dist_array) # Construct tour new_sequence = [sequence[0]] current_city = 0 visited_cities = [0] for i in range(1,len(sequence)): j = np.random.randint(0,min(len(sequence)-i,self.kNN)) next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j] visited_cities.append(next_city) new_sequence.append(sequence[next_city]) current_city = next_city return np.asarray(new_sequence) # Generate random TSP-TW instance
Example #3
Source File: coords2sort_order.py From pyscf with Apache License 2.0 | 6 votes |
def coords2sort_order(a2c): """ Delivers a list of atom indices which generates a near-diagonal overlap for a given set of atom coordinates """ na = a2c.shape[0] aa2d = squareform(pdist(a2c)) mxd = np.amax(aa2d)+1.0 a = 0 lsa = [] for ia in range(na): lsa.append(a) asrt = np.argsort(aa2d[a]) for ja in range(1,na): b = asrt[ja] if b not in lsa: break aa2d[a,b] = aa2d[b,a] = mxd a = b return np.array(lsa)
Example #4
Source File: post_proc.py From HorizonNet with MIT License | 6 votes |
def vote(vec, tol): vec = np.sort(vec) n = np.arange(len(vec))[::-1] n = n[:, None] - n[None, :] + 1.0 l = squareform(pdist(vec[:, None], 'minkowski', p=1) + 1e-9) invalid = (n < len(vec) * 0.4) | (l > tol) if (~invalid).sum() == 0 or len(vec) < tol: best_fit = np.median(vec) p_score = 0 else: l[invalid] = 1e5 n[invalid] = -1 score = n max_idx = score.argmax() max_row = max_idx // len(vec) max_col = max_idx % len(vec) assert max_col > max_row best_fit = vec[max_row:max_col+1].mean() p_score = (max_col - max_row + 1) / len(vec) l1_score = np.abs(vec - best_fit).mean() return best_fit, p_score, l1_score
Example #5
Source File: decisionboundaryplot.py From highdimensional-decision-boundary-plot with MIT License | 6 votes |
def _get_sorted_db_keypoint_distances(self, N=None): """Use a minimum spanning tree heuristic to find the N largest gaps in the line constituted by the current decision boundary keypoints. """ if N == None: N = self.n_interpolated_keypoints edges = minimum_spanning_tree( squareform(pdist(self.decision_boundary_points_2d)) ) edged = np.array( [ euclidean( self.decision_boundary_points_2d[u], self.decision_boundary_points_2d[v], ) for u, v in edges ] ) gap_edge_idx = np.argsort(edged)[::-1][: int(N)] edges = edges[gap_edge_idx] gap_distances = np.square(edged[gap_edge_idx]) gap_probability_scores = gap_distances / np.sum(gap_distances) return edges, gap_distances, gap_probability_scores
Example #6
Source File: Variogram.py From scikit-gstat with MIT License | 6 votes |
def __vdiff_indexer(self): """Pairwise indexer Returns an iterator over the values or coordinates in squareform coordinates. The iterable will be of type tuple. Returns ------- iterable """ l = len(self.values) for i in range(l): for j in range(l): if i < j: yield i, j
Example #7
Source File: test_nearest_neighbors.py From openTSNE with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_cosine_distance(self): k = 15 # Compute cosine distance nearest neighbors using ball tree knn_index = nearest_neighbors.BallTree("cosine") indices, distances = knn_index.build(self.x1, k=k) # Compute the exact nearest neighbors as a reference true_distances = squareform(pdist(self.x1, metric="cosine")) true_indices_ = np.argsort(true_distances, axis=1)[:, 1:k + 1] true_distances_ = np.vstack([d[i] for d, i in zip(true_distances, true_indices_)]) np.testing.assert_array_equal( indices, true_indices_, err_msg="Nearest neighbors do not match" ) np.testing.assert_array_equal( distances, true_distances_, err_msg="Distances do not match" )
Example #8
Source File: Variogram.py From scikit-gstat with MIT License | 6 votes |
def value_matrix(self): """Value matrix Returns a matrix of pairwise differences in absolute values. The matrix will have the shape (m, m) with m = len(Variogram.values). Note that Variogram.values holds the values themselves, while the value_matrix consists of their pairwise differences. Returns ------- values : numpy.matrix Matrix of pairwise absolute differences of the values. See Also -------- Variogram._diff """ return squareform(self._diff)
Example #9
Source File: squareform.py From mars with Apache License 2.0 | 6 votes |
def _execute_map(cls, ctx, op): inputs, device_id, xp = as_same_device( [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True) if len(inputs) == 2 and not inputs[1]: # check fail raise ValueError('Distance matrix X must be symmetric.') if xp is cp: # pragma: no cover raise NotImplementedError('`squareform` does not support running on GPU yet') with device(device_id): x = inputs[0] if x.ndim == 1: cls._to_matrix(ctx, xp, x, op) else: cls._to_vector(ctx, xp, x, op)
Example #10
Source File: squareform.py From mars with Apache License 2.0 | 6 votes |
def execute(cls, ctx, op): if op.stage == OperandStage.map: cls._execute_map(ctx, op) elif op.stage == OperandStage.reduce: cls._execute_reduce(ctx, op) else: from scipy.spatial.distance import squareform (x,), device_id, xp = as_same_device( [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True) if xp is cp: # pragma: no cover raise NotImplementedError('`squareform` does not support running on GPU yet') with device(device_id): ctx[op.outputs[0].key] = squareform(x, checks=op.checks)
Example #11
Source File: safe_io.py From safepy with GNU General Public License v3.0 | 6 votes |
def calculate_edge_lengths(G, verbose=True): # Calculate the lengths of the edges if verbose: print('Calculating edge lengths...') x = np.matrix(G.nodes.data('x'))[:, 1] y = np.matrix(G.nodes.data('y'))[:, 1] node_coordinates = np.concatenate([x, y], axis=1) node_distances = squareform(pdist(node_coordinates, 'euclidean')) adjacency_matrix = np.array(nx.adjacency_matrix(G).todense()) adjacency_matrix = adjacency_matrix.astype('float') adjacency_matrix[adjacency_matrix == 0] = np.nan edge_lengths = np.multiply(node_distances, adjacency_matrix) edge_attr_dict = {index: v for index, v in np.ndenumerate(edge_lengths) if ~np.isnan(v)} nx.set_edge_attributes(G, edge_attr_dict, 'length') return G
Example #12
Source File: post_proc2.py From LayoutNetv2 with MIT License | 6 votes |
def vote(vec, tol): vec = np.sort(vec) n = np.arange(len(vec))[::-1] n = n[:, None] - n[None, :] + 1.0 l = squareform(pdist(vec[:, None], 'minkowski', p=1) + 1e-9) invalid = (n < len(vec) * 0.4) | (l > tol) if (~invalid).sum() == 0 or len(vec) < tol: best_fit = np.median(vec) p_score = 0 else: l[invalid] = 1e5 n[invalid] = -1 score = n max_idx = score.argmax() max_row = max_idx // len(vec) max_col = max_idx % len(vec) assert max_col > max_row best_fit = vec[max_row:max_col+1].mean() p_score = (max_col - max_row + 1) / len(vec) l1_score = np.abs(vec - best_fit).mean() return best_fit, p_score, l1_score
Example #13
Source File: test_isc.py From brainiak with Apache License 2.0 | 6 votes |
def correlated_timeseries(n_subjects, n_TRs, noise=0, random_state=None): prng = np.random.RandomState(random_state) signal = prng.randn(n_TRs) correlated = True while correlated: uncorrelated = np.random.randn(n_TRs, n_subjects)[:, np.newaxis, :] unc_max = np.amax(squareform(np.corrcoef( uncorrelated[:, 0, :].T), checks=False)) unc_mean = np.mean(squareform(np.corrcoef( uncorrelated[:, 0, :].T), checks=False)) if unc_max < .3 and np.abs(unc_mean) < .001: correlated = False data = np.repeat(np.column_stack((signal, signal))[..., np.newaxis], 20, axis=2) data = np.concatenate((data, uncorrelated), axis=1) data = data + np.random.randn(n_TRs, 3, n_subjects) * noise return data # Compute ISCs using different input types # List of subjects with one voxel/ROI
Example #14
Source File: bayesian_nn.py From Stein-Variational-Gradient-Descent with MIT License | 6 votes |
def svgd_kernel(self, h = -1): sq_dist = pdist(self.theta) pairwise_dists = squareform(sq_dist)**2 if h < 0: # if h < 0, using median trick h = np.median(pairwise_dists) h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1)) # compute the rbf kernel Kxy = np.exp( -pairwise_dists / h**2 / 2) dxkxy = -np.matmul(Kxy, self.theta) sumkxy = np.sum(Kxy, axis=1) for i in range(self.theta.shape[1]): dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy) dxkxy = dxkxy / (h**2) return (Kxy, dxkxy)
Example #15
Source File: svgd.py From Stein-Variational-Gradient-Descent with MIT License | 6 votes |
def svgd_kernel(self, theta, h = -1): sq_dist = pdist(theta) pairwise_dists = squareform(sq_dist)**2 if h < 0: # if h < 0, using median trick h = np.median(pairwise_dists) h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1)) # compute the rbf kernel Kxy = np.exp( -pairwise_dists / h**2 / 2) dxkxy = -np.matmul(Kxy, theta) sumkxy = np.sum(Kxy, axis=1) for i in range(theta.shape[1]): dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy) dxkxy = dxkxy / (h**2) return (Kxy, dxkxy)
Example #16
Source File: kernels.py From BrainSpace with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _build_kernel(x, kernel, gamma=None): if kernel in {'pearson', 'spearman'}: if kernel == 'spearman': x = np.apply_along_axis(rankdata, 1, x) return np.corrcoef(x) if kernel in {'cosine', 'normalized_angle'}: x = 1 - squareform(pdist(x, metric='cosine')) if kernel == 'normalized_angle': x = 1 - np.arccos(x, x)/np.pi return x if kernel == 'gaussian': if gamma is None: gamma = 1 / x.shape[1] return rbf_kernel(x, gamma=gamma) if callable(kernel): return kernel(x) raise ValueError("Unknown kernel '{0}'.".format(kernel))
Example #17
Source File: test_dbscan.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_dbscan_similarity(): # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. eps = 0.15 min_samples = 10 # Compute similarities D = distance.squareform(distance.pdist(X)) D /= np.max(D) # Compute DBSCAN core_samples, labels = dbscan(D, metric="precomputed", eps=eps, min_samples=min_samples) # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0) assert_equal(n_clusters_1, n_clusters) db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples) labels = db.fit(D).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters)
Example #18
Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_euclidean_distances_sym(dtype, x_array_constr): # check that euclidean distances gives same result as scipy pdist # when only X is provided rng = np.random.RandomState(0) X = rng.random_sample((100, 10)).astype(dtype, copy=False) X[X < 0.8] = 0 expected = squareform(pdist(X)) X = x_array_constr(X) distances = euclidean_distances(X) # the default rtol=1e-7 is too close to the float32 precision # and fails due too rounding errors. assert_allclose(distances, expected, rtol=1e-6) assert distances.dtype == dtype
Example #19
Source File: kernel.py From m-phate with GNU General Public License v3.0 | 5 votes |
def square_pdist(X): return distance.squareform(distance.pdist(X))
Example #20
Source File: Variogram.py From scikit-gstat with MIT License | 5 votes |
def scattergram(self, ax=None, show=True): # create a new plot or use the given if ax is None: fig, ax = plt.subplots(1, 1) else: fig = ax.get_figure() tail = np.empty(0) head = tail.copy() for h in np.unique(self.lag_groups()): # get the head and tail x, y = np.where(squareform(self.lag_groups()) == h) # concatenate tail = np.concatenate((tail, self.values[x])) head = np.concatenate((head, self.values[y])) # plot the mean on tail and head ax.vlines(np.mean(tail), np.min(tail), np.max(tail), linestyles='--', color='red', lw=2) ax.hlines(np.mean(head), np.min(head), np.max(head), linestyles='--', color='red', lw=2) # plot ax.scatter(tail, head, 10, marker='o', color='orange') # annotate ax.set_ylabel('head') ax.set_xlabel('tail') # show the figure if show: # pragma: no cover fig.show() return fig
Example #21
Source File: segmenter.py From msaf with MIT License | 5 votes |
def compute_ssm(X, metric="seuclidean"): """Computes the self-similarity matrix of X.""" D = distance.pdist(X, metric=metric) D = distance.squareform(D) D /= float(D.max()) return 1 - D
Example #22
Source File: _hierarchy.py From q2-qemistree with BSD 2-Clause "Simplified" License | 5 votes |
def build_tree(relabeled_fingerprints: pd.DataFrame, metric: str = 'euclidean') -> TreeNode: ''' This function makes a tree of relatedness between mass-spectrometry features using molecular substructure fingerprints. ''' distmat = pairwise_distances(X=relabeled_fingerprints.values, Y=None, metric=metric) distsq = squareform(distmat, checks=False) linkage_matrix = linkage(distsq, method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, relabeled_fingerprints.index.tolist()) return tree
Example #23
Source File: segmenter.py From msaf with MIT License | 5 votes |
def compute_ssm(X, metric="seuclidean"): """Computes the self-similarity matrix of X.""" D = distance.pdist(X, metric=metric) D = distance.squareform(D) D /= D.max() return 1 - D
Example #24
Source File: atlas3.py From ssbio with MIT License | 5 votes |
def remove_correlated_feats(df): tmp = df.T # Remove columns with no variation nunique = tmp.apply(pd.Series.nunique) cols_to_drop = nunique[nunique == 1].index tmp.drop(cols_to_drop, axis=1, inplace=True) perc_spearman = scipy.stats.spearmanr(tmp) abs_corr = np.subtract(np.ones(shape=perc_spearman.correlation.shape), np.absolute(perc_spearman.correlation)) np.fill_diagonal(abs_corr, 0) abs_corr_clean = np.maximum(abs_corr, abs_corr.transpose()) # some floating point mismatches, just make symmetric clustering = linkage(squareform(abs_corr_clean), method='average') clusters = fcluster(clustering, .1, criterion='distance') names = tmp.columns.tolist() names_to_cluster = list(zip(names, clusters)) indices_to_keep = [] ### Extract models closest to cluster centroids for x in range(1, len(set(clusters)) + 1): # Create mask from the list of assignments for extracting submatrix of the cluster mask = np.array([1 if i == x else 0 for i in clusters], dtype=bool) # Take the index of the column with the smallest sum of distances from the submatrix idx = np.argmin(sum(abs_corr_clean[:, mask][mask, :])) # Extract names of cluster elements from names_to_cluster sublist = [name for (name, cluster) in names_to_cluster if cluster == x] # Element closest to centroid centroid = sublist[idx] indices_to_keep.append(centroid) return df.loc[df.index.isin(indices_to_keep)]
Example #25
Source File: candidates.py From luna16 with BSD 2-Clause "Simplified" License | 5 votes |
def merge_candidates_scan(candidates, seriesuid, distance=5.): distances = pdist(candidates, metric='euclidean') adjacency_matrix = squareform(distances) # Determine nodes within distance, replace by 1 (=adjacency matrix) adjacency_matrix = np.where(adjacency_matrix<=distance,1,0) # Determine all connected components in the graph n, labels = connected_components(adjacency_matrix) new_candidates = np.zeros((n,3)) # Take the mean for these connected components for cluster_i in range(n): points = candidates[np.where(labels==cluster_i)] center = np.mean(points,axis=0) new_candidates[cluster_i,:] = center x = new_candidates[:,0] y = new_candidates[:,1] z = new_candidates[:,2] labels = [seriesuid]*len(x) class_name = [0]*len(x) data= zip(labels,x,y,z,class_name) new_candidates = pd.DataFrame(data,columns=CANDIDATES_COLUMNS) return new_candidates
Example #26
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] distances_nn = np.array([distances[i, neighbors_nn[i]] for i in range(n_samples)]) assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\ abs(distances[0, neighbors_nn[0]] - distances_nn[0]) P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
Example #27
Source File: relieff.py From scikit-rebate with MIT License | 5 votes |
def _distarray_no_missing(self, xc, xd): """Distance array calculation for data with no missing values. The 'pdist() function outputs a condense distance array, and squareform() converts this vector-form distance vector to a square-form, redundant distance matrix. *This could be a target for saving memory in the future, by not needing to expand to the redundant square-form matrix. """ from scipy.spatial.distance import pdist, squareform #------------------------------------------# def pre_normalize(x): """Normalizes continuous features so they are in the same range (0 to 1)""" idx = 0 # goes through all named features (doesn really need to) this method is only applied to continuous features for i in sorted(self.attr.keys()): if self.attr[i][0] == 'discrete': continue cmin = self.attr[i][2] diff = self.attr[i][3] x[:, idx] -= cmin x[:, idx] /= diff idx += 1 return x #------------------------------------------# if self.data_type == 'discrete': # discrete features only return squareform(pdist(self._X, metric='hamming')) elif self.data_type == 'mixed': # mix of discrete and continuous features d_dist = squareform(pdist(xd, metric='hamming')) # Cityblock is also known as Manhattan distance c_dist = squareform(pdist(pre_normalize(xc), metric='cityblock')) return np.add(d_dist, c_dist) / self._num_attributes else: #continuous features only #xc = pre_normalize(xc) return squareform(pdist(pre_normalize(xc), metric='cityblock')) #==================================================================#
Example #28
Source File: Utility.py From fuku-ml with MIT License | 5 votes |
def kernel_matrix(svm_model, original_X): if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'): K = (svm_model.zeta + svm_model.gamma * np.dot(original_X, original_X.T)) ** svm_model.Q elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'): pairwise_dists = squareform(pdist(original_X, 'euclidean')) K = np.exp(-svm_model.gamma * (pairwise_dists ** 2)) ''' K = np.zeros((svm_model.data_num, svm_model.data_num)) for i in range(svm_model.data_num): for j in range(svm_model.data_num): if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'): K[i, j] = Kernel.polynomial_kernel(svm_model, original_X[i], original_X[j]) elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'): K[i, j] = Kernel.gaussian_kernel(svm_model, original_X[i], original_X[j]) ''' return K
Example #29
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_preserve_trustworthiness_approximately_with_precomputed_distances(): # Nearest neighbors should be preserved approximately. random_state = check_random_state(0) for i in range(3): X = random_state.randn(100, 2) D = squareform(pdist(X), "sqeuclidean") tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0, early_exaggeration=2.0, metric="precomputed", random_state=i, verbose=0) X_embedded = tsne.fit_transform(D) t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed") assert t > .95
Example #30
Source File: kernels.py From CatLearn with GNU General Public License v3.0 | 5 votes |
def sqe_kernel(theta, log_scale, m1, m2=None, eval_gradients=False): """Generate the covariance between data with a Gaussian kernel. Parameters ---------- theta : list A list of widths for each feature. log_scale : boolean Scaling hyperparameters in the kernel can be useful for optimization. m1 : list A list of the training fingerprint vectors. m2 : list A list of the training fingerprint vectors. Returns ------- k : array The covariance matrix. """ if eval_gradients: msg = 'Evaluation of the gradients for this kernel is not yet ' msg += 'implemented' raise NotImplementedError(msg) kwidth = theta if log_scale: kwidth = np.exp(kwidth) if m2 is None: k = distance.pdist(m1, metric='seuclidean', V=kwidth) k = distance.squareform(np.exp(-.5 * k)) np.fill_diagonal(k, 1) else: k = distance.cdist(m1, m2, metric='seuclidean', V=kwidth) k = np.exp(-.5 * k) return k