Python sklearn.cluster.spectral_clustering() Examples
The following are 13
code examples of sklearn.cluster.spectral_clustering().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #2
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #3
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_spectral_clustering_with_arpack_amg_solvers(): # Test that spectral_clustering is the same for arpack and amg solver # Based on toy example from plot_segmentation_toy.py # a small two coin image x, y = np.indices((40, 40)) center1, center2 = (14, 12), (20, 25) radius1, radius2 = 8, 7 circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2 circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2 circles = circle1 | circle2 mask = circles.copy() img = circles.astype(float) graph = img_to_graph(img, mask=mask) graph.data = np.exp(-graph.data / graph.data.std()) labels_arpack = spectral_clustering( graph, n_clusters=2, eigen_solver='arpack', random_state=0) assert len(np.unique(labels_arpack)) == 2 if amg_loaded: labels_amg = spectral_clustering( graph, n_clusters=2, eigen_solver='amg', random_state=0) assert adjusted_rand_score(labels_arpack, labels_amg) == 1 else: assert_raises( ValueError, spectral_clustering, graph, n_clusters=2, eigen_solver='amg', random_state=0)
Example #4
Source File: graph_clustering.py From Hydra with MIT License | 5 votes |
def _spectral_solver(outer_stress, serving_tasks, task_groups, n_clusters=2): """Graph clusterization using spectral methods. Args: outer_stress: a list of tuples of outer measurements info: [(task_id_i, task_id_j, stress_value)] returned by `applications.measures.stress_points`. serving_tasks: list of tasks that the current branch is serving. task_groups: these are list of list of tasks of children nodes. n_clusters: number of clusters to divide to. Returns: a numpy array of cluster indices of each group, e.g. [0, 1, 0] """ task_id_idx = dict((k, i) for i, k in enumerate(serving_tasks)) data = np.zeros((len(serving_tasks), len(serving_tasks))) for task_id_i, task_id_j, stressval in outer_stress: data[task_id_idx[task_id_i], task_id_idx[task_id_j]] = stressval df_tasks = pd.DataFrame( data=data, index=serving_tasks, columns=serving_tasks) data = np.zeros((len(task_groups), len(task_groups))) for gid_i in range(len(task_groups)): for gid_j in range(len(task_groups)): t = df_tasks.loc[task_groups[gid_i], task_groups[gid_j]] ij_stress = t.max(axis=1).mean() t = df_tasks.loc[task_groups[gid_j], task_groups[gid_i]] ji_stress = t.max(axis=1).mean() data[gid_i, gid_j] = (ij_stress + ji_stress) / 2. df_groups = pd.DataFrame(data=data) affinity = df_groups.values affinity = np.exp(-affinity / affinity.max()) clusters = spectral_clustering(affinity, n_clusters=n_clusters) return clusters
Example #5
Source File: networkclustering.py From PyPSA with GNU General Public License v3.0 | 5 votes |
def busmap_by_spectral_clustering(network, n_clusters, **kwds): lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=network.lines.num_parallel).set_index(['bus0','bus1']) lines.weight+=0.1 G = nx.Graph() G.add_nodes_from(network.buses.index) G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples()) return pd.Series(list(map(str,sk_spectral_clustering(nx.adjacency_matrix(G), n_clusters, **kwds) + 1)), index=network.buses.index)
Example #6
Source File: metrics.py From snfpy with GNU Lesser General Public License v3.0 | 5 votes |
def rank_feature_by_nmi(inputs, W, *, K=20, mu=0.5, n_clusters=None): """ Calculates NMI of each feature in `inputs` with `W` Parameters ---------- inputs : list-of-tuple Each tuple should contain (1) an (N, M) data array, where N is samples M is features, and (2) a string indicating the metric to use to compute a distance matrix for the given data. This MUST be one of the options available in :py:func:`scipy.spatial.distance.cdist` W : (N, N) array_like Similarity array generated by :py:func:`snf.compute.snf` K : (0, N) int, optional Hyperparameter normalization factor for scaling. Default: 20 mu : (0, 1) float, optional Hyperparameter normalization factor for scaling. Default: 0.5 n_clusters : int, optional Number of desired clusters. Default: determined by eigengap (see `snf.get_n_clusters()`) Returns ------- nmi : list of (M,) np.ndarray Normalized mutual information scores for each feature of input arrays """ if n_clusters is None: n_clusters = compute.get_n_clusters(W)[0] snf_labels = spectral_clustering(W, n_clusters) nmi = [np.empty(shape=(d.shape[-1])) for d, m in inputs] for ndtype, (dtype, metric) in enumerate(inputs): for nfeature, feature in enumerate(np.asarray(dtype).T): aff = compute.make_affinity(np.vstack(feature), K=K, mu=mu, metric=metric) aff_labels = spectral_clustering(aff, n_clusters) nmi[ndtype][nfeature] = v_measure_score(snf_labels, aff_labels) return nmi
Example #7
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_spectral_clustering(self): N = 50 m = np.random.random_integers(1, 200, size=(N, N)) m = (m + m.T) / 2 df = pdml.ModelFrame(m) result = df.cluster.spectral_clustering(random_state=self.random_state) expected = cluster.spectral_clustering(m, random_state=self.random_state) self.assertIsInstance(result, pdml.ModelSeries) tm.assert_index_equal(result.index, df.index) tm.assert_numpy_array_equal(result.values, expected)
Example #8
Source File: LST.py From python-urbanPlanning with MIT License | 5 votes |
def __init__(self,LST): self.LST=LST ##应用spectral_clustering()聚类。此次实验中,深入分析时未使用该数据,可以自行解读所建立的区域反映LST数据的含义
Example #9
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_amg_mode(): # Test the amg mode of SpectralClustering centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) try: from pyamg import smoothed_aggregation_solver # noqa amg_loaded = True except ImportError: amg_loaded = False if amg_loaded: labels = spectral_clustering(S, n_clusters=len(centers), random_state=0, eigen_solver="amg") # We don't care too much that it's good, just that it *worked*. # There does have to be some lower limit on the performance though. assert_greater(np.mean(labels == true_labels), .3) else: assert_raises(ValueError, spectral_embedding, S, n_components=len(centers), random_state=0, eigen_solver="amg")
Example #10
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_mode(): # Test that SpectralClustering fails with an unknown mode set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, eigen_solver="<unknown>")
Example #11
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_spectral_unknown_assign_labels(): # Test that SpectralClustering fails with an unknown assign_labels set. centers = np.array([ [0., 0., 0.], [10., 10., 10.], [20., 20., 20.], ]) X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=1., random_state=42) D = pairwise_distances(X) # Distance matrix S = np.max(D) - D # Similarity matrix S = sparse.coo_matrix(S) assert_raises(ValueError, spectral_clustering, S, n_clusters=2, random_state=0, assign_labels="<unknown>")
Example #12
Source File: cv.py From snfpy with GNU Lesser General Public License v3.0 | 4 votes |
def compute_SNF(*data, metric='sqeuclidean', K=20, mu=1, n_clusters=None, t=20, n_perms=1000, normalize=True, seed=None): """ Runs a full SNF on `data` and returns cluster affinity scores and labels Parameters ---------- *data : (N, M) array_like Raw data arrays, where `N` is samples and `M` is features. metric : str or list-of-str, optional Distance metrics to compute on `data`. Must be one of available metrics in ``scipy.spatial.distance.pdist``. If a list is provided for `data` a list of equal length may be supplied here. Default: 'sqeuclidean' K : int, optional Number of neighbors to compare similarity against. Default: 20 mu : (0, 1) float, optional Hyperparameter normalization factor for scaling. Default: 0.5 n_clusters : int or list-of-int, optional Number of clusters to find in combined data. Default: determined by eigengap (see `compute.get_n_clusters()`) t : int, optional Number of iterations to perform information swapping. Default: 20 n_perms : int, optional Number of permutations for calculating z_affinity. Default: 1000 normalize : bool, optional Whether to normalize (zscore) the data before constructing the affinity matrix. Each feature is separately normalized. Default: True Returns ------- z_affinity : list-of-float Z-score of silhouette (affinity) score snf_labels : list of (N,) np.ndarray Cluster labels for subjects """ rs = check_random_state(seed) # make affinity matrices for all inputs and run SNF all_aff = compute.make_affinity(*data, metric=metric, K=K, mu=mu, normalize=normalize) snf_aff = compute.snf(*all_aff, K=K, t=t) # get estimated number of clusters (if not provided) if n_clusters is None: n_clusters = [compute.get_n_clusters(snf_aff)[0]] elif isinstance(n_clusters, int): n_clusters = [n_clusters] # perform spectral clustering across all `n_clusters` snf_labels = [spectral_clustering(snf_aff, clust, random_state=rs) for clust in n_clusters] # get z-affinity as desired if n_perms is not None and n_perms > 0: z_affinity = [metrics.affinity_zscore(snf_aff, label, n_perms, seed=rs) for label in snf_labels] return z_affinity, snf_labels return snf_labels
Example #13
Source File: LST.py From python-urbanPlanning with MIT License | 4 votes |
def LSTClustering(self): # 参考“Segmenting the picture of greek coins in regions”方法,Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung # License: BSD 3 clause orig_coins=self.LST # these were introduced in skimage-0.14 if LooseVersion(skimage.__version__) >= '0.14': rescale_params = {'anti_aliasing': False, 'multichannel': False} else: rescale_params = {} smoothened_coins = gaussian_filter(orig_coins, sigma=2) rescaled_coins = rescale(smoothened_coins, 0.2, mode="reflect",**rescale_params) # Convert the image into a graph with the value of the gradient on the # edges. graph = image.img_to_graph(rescaled_coins) # Take a decreasing function of the gradient: an exponential # The smaller beta is, the more independent the segmentation is of the # actual image. For beta=1, the segmentation is close to a voronoi beta = 10 eps = 1e-6 graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps # Apply spectral clustering (this step goes much faster if you have pyamg # installed) N_REGIONS = 200 for assign_labels in ('discretize',): # for assign_labels in ('kmeans', 'discretize'): t0 = time.time() labels = spectral_clustering(graph, n_clusters=N_REGIONS,assign_labels=assign_labels, random_state=42) t1 = time.time() labels = labels.reshape(rescaled_coins.shape) plt.figure(figsize=(5*3, 5*3)) plt.imshow(rescaled_coins, cmap=plt.cm.gray) for l in range(N_REGIONS): plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))]) plt.xticks(()) plt.yticks(()) title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0)) print(title) plt.title(title) plt.show() ##基于卷积温度梯度变化界定冷区和热区的空间分布结构