Example #1
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_spectral_clustering(eigen_solver, assign_labels): S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]) for mat in (S, sparse.csr_matrix(S)): model = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed', eigen_solver=eigen_solver, assign_labels=assign_labels ).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1 model_copy = pickle.loads(pickle.dumps(model)) assert model_copy.n_clusters == model.n_clusters assert model_copy.eigen_solver == model.eigen_solver assert_array_equal(model_copy.labels_, model.labels_)
Example #2
Source File: From MvDSCN with MIT License | 6 votes |
def post_proC(C, K, d, alpha): # C: coefficient matrix, K: number of clusters, d: dimension of each subspace C = 0.5*(C + C.T) r = min(d*K + 1, C.shape[0]-1) U, S, _ = svds(C, r, v0=np.ones(C.shape[0])) U = U[:,::-1] S = np.sqrt(S[::-1]) S = np.diag(S) U = U = normalize(U, norm='l2', axis = 1) Z = Z = Z * (Z>0) L = np.abs(Z ** alpha) L = L/L.max() L = 0.5 * (L + L.T) spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize', random_state=66) grp = spectral.fit_predict(L) + 1 return grp, L
Example #3
Source File: From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def spectral_clustering(n_clusters, samples, size=False): """ Run k-means clustering on vertex coordinates. Parameters: - - - - - n_clusters : int number of clusters to generate samples : array adjacency matrix of surface or region """ # Run Spectral Clustering spectral = cluster.SpectralClustering( n_clusters=n_clusters, affinity='precomputed') labels = spectral.labels_.copy() labels = labels.astype(np.int32)+1 return labels
Example #4
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_clustering_sparse(): X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) S = rbf_kernel(X, gamma=1) S = np.maximum(S - 1e-4, 0) S = sparse.coo_matrix(S) labels = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed').fit(S).labels_ assert adjusted_rand_score(y, labels) == 1
Example #5
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_clustering_sparse(): X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) S = rbf_kernel(X, gamma=1) S = np.maximum(S - 1e-4, 0) S = sparse.coo_matrix(S) labels = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed').fit(S).labels_ assert_equal(adjusted_rand_score(y, labels), 1)
Example #6
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #7
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #8
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_amg_mode(): # Test the amg mode of SpectralClustering centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) try: from pyamg import smoothed_aggregation_solver # noqa amg_loaded = True except ImportError: amg_loaded = False if amg_loaded: labels = spectral_clustering(S, n_clusters=len(centers), random_state=0, eigen_solver="amg") # We don't care too much that it's good, just that it *worked*. # There does have to be some lower limit on the performance though. assert_greater(np.mean(labels == true_labels), .3) else: assert_raises(ValueError, spectral_embedding, S, n_components=len(centers), random_state=0, eigen_solver="amg")
Example #9
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_clustering(): S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]) for eigen_solver in ('arpack', 'lobpcg'): for assign_labels in ('kmeans', 'discretize'): for mat in (S, sparse.csr_matrix(S)): model = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed', eigen_solver=eigen_solver, assign_labels=assign_labels ).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels assert_array_equal(labels, [1, 1, 1, 0, 0, 0, 0]) model_copy = loads(dumps(model)) assert_equal(model_copy.n_clusters, model.n_clusters) assert_equal(model_copy.eigen_solver, model.eigen_solver) assert_array_equal(model_copy.labels_, model.labels_)
Example #10
Source File: From learn-to-cluster with MIT License | 5 votes |
def dask_spectral(feat, n_clusters, **kwargs): from dask_ml.cluster import SpectralClustering spectral = SpectralClustering(n_clusters=n_clusters, affinity='rbf', random_state=0).fit(feat) return spectral.labels_.compute()
Example #11
Source File: From learn-to-cluster with MIT License | 5 votes |
def spectral(feat, n_clusters, **kwargs): spectral = cluster.SpectralClustering(n_clusters=n_clusters, assign_labels="discretize", affinity="nearest_neighbors", random_state=0).fit(feat) return spectral.labels_
Example #12
Source File: From mmvt with GNU General Public License v3.0 | 5 votes |
def compare(data, n_groups, output_fol): # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {}) plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups}) for ct in ['spherical', 'tied', 'diag', 'full']: plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (), {'n_components': n_groups, 'covariance_type': ct}) plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95}) plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False}) plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups}) plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'}) plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025}) # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
Example #13
Source File: From TractSeg with Apache License 2.0 | 5 votes |
def cluster(points, algorithm=DBSCAN): print("Running {}...".format(algorithm)) if algorithm == "KMeans": # not good at finding clusters if close together labels = KMeans(n_clusters=2, random_state=0, n_jobs=-1).fit_predict(points) elif algorithm == "DBSCAN": # no fixed number of labels; slow with high eps labels = DBSCAN(eps=3.0, n_jobs=-1).fit_predict(points) # labels = SpectralClustering(n_clusters=2, n_jobs=-1).fit_predict(points) # slow (> 1min) # labels = AgglomerativeClustering(n_clusters=2).fit_predict(points) # fast points_start, points_end = select_two_biggest_clusters(labels, points) return points_start, points_end
Example #14
Source File: From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['gamma'], strs=['affinity'], ints=['k', 'random_state'], aliases={'k': 'n_clusters'}, ) self.estimator = _SpectralClustering(**out_params) self.scaler = StandardScaler()
Example #15
Source File: From graph-neural-networks with GNU General Public License v3.0 | 5 votes |
def computeSourceNodes(A, C): """ computeSourceNodes: compute source nodes for the source localization problem Input: A (np.array): adjacency matrix of shape N x N C (int): number of classes Output: sourceNodes (list): contains the indices of the C source nodes Uses the adjacency matrix to compute C communities by means of spectral clustering, and then selects the node with largest degree within each community """ sourceNodes = [] degree = np.sum(A, axis = 0) # degree of each vector # Compute communities communityClusters = SpectralClustering(n_clusters = C, affinity = 'precomputed', assign_labels = 'discretize') communityClusters = communityLabels = communityClusters.labels_ # For each community for c in range(C): communityNodes = np.nonzero(communityLabels == c)[0] degreeSorted = np.argsort(degree[communityNodes]) sourceNodes = sourceNodes + [communityNodes[degreeSorted[-1]]] return sourceNodes
Example #16
Source File: From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform_rays_model_cdf_spectral(list_rays, nb_components=5): """ compute the mixture model and transform it into cumulative distribution :param list(list(int)) list_rays: list ray features (distances) :param int nb_components: number components in mixture model :return tuple(any,list(list(int))): mixture model, list of stat/param of models >>> np.random.seed(0) >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10], ... [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]] >>> mm, cdist = transform_rays_model_cdf_spectral(list_rays) >>> np.round(cdist, 1).tolist() # doctest: +NORMALIZE_WHITESPACE [[1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.6, 0.5, 0.2, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.7, 0.5, 0.2, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.3, 0.0]] """ rays = np.array(list_rays) sc = cluster.SpectralClustering(nb_components) logging.debug('SpectralClustering found % components with counts: %r', len(np.unique(sc.labels_)), np.bincount(sc.labels_)) labels = sc.labels_ means = np.zeros((len(np.unique(labels)), rays.shape[1])) stds = np.zeros((len(means), rays.shape[1])) for i, lb in enumerate(np.unique(labels)): means[i, :] = np.mean(np.asarray(list_rays)[labels == lb], axis=0) means[i, :] = ndimage.filters.gaussian_filter1d(means[i, :], 1) stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0) stds += 1 weights = np.bincount(sc.labels_) / float(len(sc.labels_)) # compute the fairest mean + sigma over all components and ray angles max_dist = np.max([[m[i] + c[i] for i in range(len(m))] for m, c in zip(means, stds)]) cdist = compute_cumulative_distrib(means, stds, weights, max_dist) return sc, cdist.tolist()
Example #17
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #18
Source File: From scdiff with MIT License | 5 votes |
def performClustering(self): print('start clustering...') KET=self.KET # default clustering model [dCK,dBS]=self.getClusteringPars() #pdb.set_trace() AC=[] gc.collect() for i in range(len(KET)): print("clustering for time: "+str(KET[i])) ti=KET[i] CT = self.dET[ti] CKT=dCK[ti] BST=dBS[ti] if CKT > 1: if (self.largeType=='1' or self.largeType=='True'): X=copy.deepcopy(self.affMatrix[ti]) SC = KMeans(n_clusters=CKT, random_state=BST) else: X=copy.deepcopy(self.affMatrix[ti]) SC = SpectralClustering(n_clusters=CKT, random_state=BST) Y = SC.labels_ for j in range(len(CT)): CT[j].Label = Y[j] CC = [Cluster([item for item in CT if item.Label == j], ti, str(ti) + '_' + str(j)) for j in range(CKT)] AC += CC else: for j in range(len(CT)): CT[j].Label = 0 CC = [Cluster([item for item in CT if item.Label == 0], ti, str(ti)+'_'+str(0))] AC += CC return AC # cluster
Example #19
Source File: From cdp with MIT License | 5 votes |
def spectral(feat, n_clusters=2): spectral = cluster.SpectralClustering(n_clusters=n_clusters, assign_labels="discretize", affinity="nearest_neighbors", random_state=0).fit(feat) return spectral.labels_
Example #20
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #21
Source File: From scdiff with MIT License | 4 votes |
def determineSeed(self,dCK): #return {14.0:0,16.0:0,18.0:0} print("learning clustering seeds...") dBS = {} # Best seeds KET=self.KET NSEEDS=100 if self.largeType ==None else 1 #100 SPECTRALIMIT=100 for T in KET[1:]: try: CT = self.dET[T] CKi = dCK[T] SS=[] if self.largeType=='1' or self.largeType=='True': X=copy.deepcopy(self.affMatrix[T]) SEEDS = range(NSEEDS) for s in SEEDS: SC = KMeans(n_clusters=CKi) Y = SC.labels_ sscore = silhouette_score(X, Y) SS.append(sscore) print("seeds:"+str(s)) sbest = SEEDS[SS.index(max(SS))] dBS[T] = sbest else: X=copy.deepcopy(self.affMatrix[T]) DX=self.affinity2Distance(X) SEEDS = range(NSEEDS) for s in SEEDS: SC = SpectralClustering(n_clusters=CKi, random_state=s) Y = SC.labels_ sscore = silhouette_score(DX, Y, metric="precomputed") SS.append(sscore) print("seeds:"+str(s)) sbest = SEEDS[SS.index(max(SS))] dBS[T] = sbest except: dBS[T]=0 dBS[KET[0]] = 0 return dBS
Example #22
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) assert_warns_message(UserWarning, 'not fully connected',, X) assert adjusted_rand_score(y, sp.labels_) == 1 sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = assert (X.shape[0],) == labels.shape sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = assert (X.shape[0],) == labels.shape def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = assert (X.shape[0],) == labels.shape # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') assert_raises(ValueError,, X)
Example #23
Source File: From pandas-ml with BSD 3-Clause "New" or "Revised" License | 3 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation) self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering) self.assertIs(df.cluster.Birch, cluster.Birch) self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN) self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration) self.assertIs(df.cluster.KMeans, cluster.KMeans) self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans) self.assertIs(df.cluster.MeanShift, cluster.MeanShift) self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering) self.assertIs(df.cluster.bicluster.SpectralBiclustering, cluster.bicluster.SpectralBiclustering) self.assertIs(df.cluster.bicluster.SpectralCoclustering, cluster.bicluster.SpectralCoclustering)