Python sklearn.cluster.SpectralClustering() Examples
The following are 23
code examples of sklearn.cluster.SpectralClustering().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_spectral_clustering(eigen_solver, assign_labels): S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]) for mat in (S, sparse.csr_matrix(S)): model = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed', eigen_solver=eigen_solver, assign_labels=assign_labels ).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1 model_copy = pickle.loads(pickle.dumps(model)) assert model_copy.n_clusters == model.n_clusters assert model_copy.eigen_solver == model.eigen_solver assert_array_equal(model_copy.labels_, model.labels_)
Example #2
Source File: metric.py From MvDSCN with MIT License | 6 votes |
def post_proC(C, K, d, alpha): # C: coefficient matrix, K: number of clusters, d: dimension of each subspace C = 0.5*(C + C.T) r = min(d*K + 1, C.shape[0]-1) U, S, _ = svds(C, r, v0=np.ones(C.shape[0])) U = U[:,::-1] S = np.sqrt(S[::-1]) S = np.diag(S) U = U.dot(S) U = normalize(U, norm='l2', axis = 1) Z = U.dot(U.T) Z = Z * (Z>0) L = np.abs(Z ** alpha) L = L/L.max() L = 0.5 * (L + L.T) spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize', random_state=66) spectral.fit(L) grp = spectral.fit_predict(L) + 1 return grp, L
Example #3
Source File: clusterings.py From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def spectral_clustering(n_clusters, samples, size=False): """ Run k-means clustering on vertex coordinates. Parameters: - - - - - n_clusters : int number of clusters to generate samples : array adjacency matrix of surface or region """ # Run Spectral Clustering spectral = cluster.SpectralClustering( n_clusters=n_clusters, affinity='precomputed') spectral.fit(samples) labels = spectral.labels_.copy() labels = labels.astype(np.int32)+1 return labels
Example #4
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_clustering_sparse(): X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) S = rbf_kernel(X, gamma=1) S = np.maximum(S - 1e-4, 0) S = sparse.coo_matrix(S) labels = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed').fit(S).labels_ assert adjusted_rand_score(y, labels) == 1
Example #5
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_clustering_sparse(): X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) S = rbf_kernel(X, gamma=1) S = np.maximum(S - 1e-4, 0) S = sparse.coo_matrix(S) labels = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed').fit(S).labels_ assert_equal(adjusted_rand_score(y, labels), 1)
Example #6
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #7
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #8
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_amg_mode(): # Test the amg mode of SpectralClustering centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) try: from pyamg import smoothed_aggregation_solver # noqa amg_loaded = True except ImportError: amg_loaded = False if amg_loaded: labels = spectral_clustering(S, n_clusters=len(centers), random_state=0, eigen_solver="amg") # We don't care too much that it's good, just that it *worked*. # There does have to be some lower limit on the performance though. assert_greater(np.mean(labels == true_labels), .3) else: assert_raises(ValueError, spectral_embedding, S, n_components=len(centers), random_state=0, eigen_solver="amg")
Example #9
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_clustering(): S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]) for eigen_solver in ('arpack', 'lobpcg'): for assign_labels in ('kmeans', 'discretize'): for mat in (S, sparse.csr_matrix(S)): model = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed', eigen_solver=eigen_solver, assign_labels=assign_labels ).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels assert_array_equal(labels, [1, 1, 1, 0, 0, 0, 0]) model_copy = loads(dumps(model)) assert_equal(model_copy.n_clusters, model.n_clusters) assert_equal(model_copy.eigen_solver, model.eigen_solver) assert_array_equal(model_copy.labels_, model.labels_)
Example #10
Source File: sklearn_cluster.py From learn-to-cluster with MIT License | 5 votes |
def dask_spectral(feat, n_clusters, **kwargs): from dask_ml.cluster import SpectralClustering spectral = SpectralClustering(n_clusters=n_clusters, affinity='rbf', random_state=0).fit(feat) return spectral.labels_.compute()
Example #11
Source File: sklearn_cluster.py From learn-to-cluster with MIT License | 5 votes |
def spectral(feat, n_clusters, **kwargs): spectral = cluster.SpectralClustering(n_clusters=n_clusters, assign_labels="discretize", affinity="nearest_neighbors", random_state=0).fit(feat) return spectral.labels_
Example #12
Source File: compare_clustering_algs.py From mmvt with GNU General Public License v3.0 | 5 votes |
def compare(data, n_groups, output_fol): # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {}) plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups}) for ct in ['spherical', 'tied', 'diag', 'full']: plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (), {'n_components': n_groups, 'covariance_type': ct}) plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95}) plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False}) plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups}) plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'}) plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025}) # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
Example #13
Source File: create_endpoints_mask_with_clustering.py From TractSeg with Apache License 2.0 | 5 votes |
def cluster(points, algorithm=DBSCAN): print("Running {}...".format(algorithm)) if algorithm == "KMeans": # not good at finding clusters if close together labels = KMeans(n_clusters=2, random_state=0, n_jobs=-1).fit_predict(points) elif algorithm == "DBSCAN": # no fixed number of labels; slow with high eps labels = DBSCAN(eps=3.0, n_jobs=-1).fit_predict(points) # labels = SpectralClustering(n_clusters=2, n_jobs=-1).fit_predict(points) # slow (> 1min) # labels = AgglomerativeClustering(n_clusters=2).fit_predict(points) # fast points_start, points_end = select_two_biggest_clusters(labels, points) return points_start, points_end
Example #14
Source File: SpectralClustering.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['gamma'], strs=['affinity'], ints=['k', 'random_state'], aliases={'k': 'n_clusters'}, ) self.estimator = _SpectralClustering(**out_params) self.scaler = StandardScaler()
Example #15
Source File: graphTools.py From graph-neural-networks with GNU General Public License v3.0 | 5 votes |
def computeSourceNodes(A, C): """ computeSourceNodes: compute source nodes for the source localization problem Input: A (np.array): adjacency matrix of shape N x N C (int): number of classes Output: sourceNodes (list): contains the indices of the C source nodes Uses the adjacency matrix to compute C communities by means of spectral clustering, and then selects the node with largest degree within each community """ sourceNodes = [] degree = np.sum(A, axis = 0) # degree of each vector # Compute communities communityClusters = SpectralClustering(n_clusters = C, affinity = 'precomputed', assign_labels = 'discretize') communityClusters = communityClusters.fit(A) communityLabels = communityClusters.labels_ # For each community for c in range(C): communityNodes = np.nonzero(communityLabels == c)[0] degreeSorted = np.argsort(degree[communityNodes]) sourceNodes = sourceNodes + [communityNodes[degreeSorted[-1]]] return sourceNodes
Example #16
Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform_rays_model_cdf_spectral(list_rays, nb_components=5): """ compute the mixture model and transform it into cumulative distribution :param list(list(int)) list_rays: list ray features (distances) :param int nb_components: number components in mixture model :return tuple(any,list(list(int))): mixture model, list of stat/param of models >>> np.random.seed(0) >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10], ... [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]] >>> mm, cdist = transform_rays_model_cdf_spectral(list_rays) >>> np.round(cdist, 1).tolist() # doctest: +NORMALIZE_WHITESPACE [[1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.6, 0.5, 0.2, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.7, 0.5, 0.2, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.3, 0.0]] """ rays = np.array(list_rays) sc = cluster.SpectralClustering(nb_components) sc.fit(rays) logging.debug('SpectralClustering found % components with counts: %r', len(np.unique(sc.labels_)), np.bincount(sc.labels_)) labels = sc.labels_ means = np.zeros((len(np.unique(labels)), rays.shape[1])) stds = np.zeros((len(means), rays.shape[1])) for i, lb in enumerate(np.unique(labels)): means[i, :] = np.mean(np.asarray(list_rays)[labels == lb], axis=0) means[i, :] = ndimage.filters.gaussian_filter1d(means[i, :], 1) stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0) stds += 1 weights = np.bincount(sc.labels_) / float(len(sc.labels_)) # compute the fairest mean + sigma over all components and ray angles max_dist = np.max([[m[i] + c[i] for i in range(len(m))] for m, c in zip(means, stds)]) cdist = compute_cumulative_distrib(means, stds, weights, max_dist) return sc, cdist.tolist()
Example #17
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #18
Source File: scdiff.py From scdiff with MIT License | 5 votes |
def performClustering(self): print('start clustering...') KET=self.KET # default clustering model [dCK,dBS]=self.getClusteringPars() #pdb.set_trace() AC=[] gc.collect() for i in range(len(KET)): print("clustering for time: "+str(KET[i])) ti=KET[i] CT = self.dET[ti] CKT=dCK[ti] BST=dBS[ti] if CKT > 1: if (self.largeType=='1' or self.largeType=='True'): X=copy.deepcopy(self.affMatrix[ti]) SC = KMeans(n_clusters=CKT, random_state=BST) else: X=copy.deepcopy(self.affMatrix[ti]) SC = SpectralClustering(n_clusters=CKT, random_state=BST) SC.fit(X) Y = SC.labels_ for j in range(len(CT)): CT[j].Label = Y[j] CC = [Cluster([item for item in CT if item.Label == j], ti, str(ti) + '_' + str(j)) for j in range(CKT)] AC += CC else: for j in range(len(CT)): CT[j].Label = 0 CC = [Cluster([item for item in CT if item.Label == 0], ti, str(ti)+'_'+str(0))] AC += CC return AC # cluster
Example #19
Source File: baseline_clustering.py From cdp with MIT License | 5 votes |
def spectral(feat, n_clusters=2): spectral = cluster.SpectralClustering(n_clusters=n_clusters, assign_labels="discretize", affinity="nearest_neighbors", random_state=0).fit(feat) return spectral.labels_
Example #20
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #21
Source File: scdiff.py From scdiff with MIT License | 4 votes |
def determineSeed(self,dCK): #return {14.0:0,16.0:0,18.0:0} print("learning clustering seeds...") dBS = {} # Best seeds KET=self.KET NSEEDS=100 if self.largeType ==None else 1 #100 SPECTRALIMIT=100 for T in KET[1:]: try: CT = self.dET[T] CKi = dCK[T] SS=[] if self.largeType=='1' or self.largeType=='True': X=copy.deepcopy(self.affMatrix[T]) SEEDS = range(NSEEDS) for s in SEEDS: SC = KMeans(n_clusters=CKi) SC.fit(X) Y = SC.labels_ sscore = silhouette_score(X, Y) SS.append(sscore) print("seeds:"+str(s)) sbest = SEEDS[SS.index(max(SS))] dBS[T] = sbest else: X=copy.deepcopy(self.affMatrix[T]) DX=self.affinity2Distance(X) SEEDS = range(NSEEDS) for s in SEEDS: SC = SpectralClustering(n_clusters=CKi, random_state=s) SC.fit(X) Y = SC.labels_ sscore = silhouette_score(DX, Y, metric="precomputed") SS.append(sscore) print("seeds:"+str(s)) sbest = SEEDS[SS.index(max(SS))] dBS[T] = sbest except: dBS[T]=0 dBS[KET[0]] = 0 return dBS
Example #22
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) assert_warns_message(UserWarning, 'not fully connected', sp.fit, X) assert adjusted_rand_score(y, sp.labels_) == 1 sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') assert_raises(ValueError, sp.fit, X)
Example #23
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 3 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation) self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering) self.assertIs(df.cluster.Birch, cluster.Birch) self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN) self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration) self.assertIs(df.cluster.KMeans, cluster.KMeans) self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans) self.assertIs(df.cluster.MeanShift, cluster.MeanShift) self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering) self.assertIs(df.cluster.bicluster.SpectralBiclustering, cluster.bicluster.SpectralBiclustering) self.assertIs(df.cluster.bicluster.SpectralCoclustering, cluster.bicluster.SpectralCoclustering)