Python Examples of sklearn.cluster.spectral

Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>")

Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>")

Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_spectral_clustering_with_arpack_amg_solvers():
    # Test that spectral_clustering is the same for arpack and amg solver
    # Based on toy example from plot_segmentation_toy.py

    # a small two coin image
    x, y = np.indices((40, 40))

    center1, center2 = (14, 12), (20, 25)
    radius1, radius2 = 8, 7

    circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
    circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2

    circles = circle1 | circle2
    mask = circles.copy()
    img = circles.astype(float)

    graph = img_to_graph(img, mask=mask)
    graph.data = np.exp(-graph.data / graph.data.std())

    labels_arpack = spectral_clustering(
        graph, n_clusters=2, eigen_solver='arpack', random_state=0)

    assert len(np.unique(labels_arpack)) == 2

    if amg_loaded:
        labels_amg = spectral_clustering(
            graph, n_clusters=2, eigen_solver='amg', random_state=0)
        assert adjusted_rand_score(labels_arpack, labels_amg) == 1
    else:
        assert_raises(
            ValueError, spectral_clustering,
            graph, n_clusters=2, eigen_solver='amg', random_state=0)

Source File: graph_clustering.py From Hydra with MIT License

5 votes

def _spectral_solver(outer_stress,
                     serving_tasks,
                     task_groups,
                     n_clusters=2):
    """Graph clusterization using spectral methods.

    Args:
      outer_stress:   a list of tuples of outer measurements info:
                      [(task_id_i, task_id_j, stress_value)]
                      returned by `applications.measures.stress_points`.
      serving_tasks:  list of tasks that the current branch is serving.
      task_groups:    these are list of list of tasks of children nodes.
      n_clusters:     number of clusters to divide to.

    Returns:
      a numpy array of cluster indices of each group, e.g. [0, 1, 0]
    """
    task_id_idx = dict((k, i) for i, k in enumerate(serving_tasks))
    data = np.zeros((len(serving_tasks), len(serving_tasks)))
    for task_id_i, task_id_j, stressval in outer_stress:
        data[task_id_idx[task_id_i], task_id_idx[task_id_j]] = stressval
    df_tasks = pd.DataFrame(
            data=data, index=serving_tasks, columns=serving_tasks)

    data = np.zeros((len(task_groups), len(task_groups)))
    for gid_i in range(len(task_groups)):
        for gid_j in range(len(task_groups)):
            t = df_tasks.loc[task_groups[gid_i], task_groups[gid_j]]
            ij_stress = t.max(axis=1).mean()
            t = df_tasks.loc[task_groups[gid_j], task_groups[gid_i]]
            ji_stress = t.max(axis=1).mean()
            data[gid_i, gid_j] = (ij_stress + ji_stress) / 2.
    df_groups = pd.DataFrame(data=data)

    affinity = df_groups.values
    affinity = np.exp(-affinity / affinity.max())
    clusters = spectral_clustering(affinity, n_clusters=n_clusters)
    return clusters

Source File: networkclustering.py From PyPSA with GNU General Public License v3.0

5 votes

def busmap_by_spectral_clustering(network, n_clusters, **kwds):
        lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=network.lines.num_parallel).set_index(['bus0','bus1'])
        lines.weight+=0.1
        G = nx.Graph()
        G.add_nodes_from(network.buses.index)
        G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
        return pd.Series(list(map(str,sk_spectral_clustering(nx.adjacency_matrix(G), n_clusters, **kwds) + 1)),
                         index=network.buses.index)

Source File: metrics.py From snfpy with GNU Lesser General Public License v3.0

5 votes

def rank_feature_by_nmi(inputs, W, *, K=20, mu=0.5, n_clusters=None):
    """
    Calculates NMI of each feature in `inputs` with `W`

    Parameters
    ----------
    inputs : list-of-tuple
        Each tuple should contain (1) an (N, M) data array, where N is samples
        M is features, and (2) a string indicating the metric to use to compute
        a distance matrix for the given data. This MUST be one of the options
        available in :py:func:`scipy.spatial.distance.cdist`
    W : (N, N) array_like
        Similarity array generated by :py:func:`snf.compute.snf`
    K : (0, N) int, optional
        Hyperparameter normalization factor for scaling. Default: 20
    mu : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 0.5
    n_clusters : int, optional
        Number of desired clusters. Default: determined by eigengap (see
        `snf.get_n_clusters()`)

    Returns
    -------
    nmi : list of (M,) np.ndarray
        Normalized mutual information scores for each feature of input arrays
    """

    if n_clusters is None:
        n_clusters = compute.get_n_clusters(W)[0]
    snf_labels = spectral_clustering(W, n_clusters)
    nmi = [np.empty(shape=(d.shape[-1])) for d, m in inputs]
    for ndtype, (dtype, metric) in enumerate(inputs):
        for nfeature, feature in enumerate(np.asarray(dtype).T):
            aff = compute.make_affinity(np.vstack(feature), K=K, mu=mu,
                                        metric=metric)
            aff_labels = spectral_clustering(aff, n_clusters)
            nmi[ndtype][nfeature] = v_measure_score(snf_labels, aff_labels)

    return nmi

Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_spectral_clustering(self):
        N = 50
        m = np.random.random_integers(1, 200, size=(N, N))
        m = (m + m.T) / 2

        df = pdml.ModelFrame(m)
        result = df.cluster.spectral_clustering(random_state=self.random_state)
        expected = cluster.spectral_clustering(m, random_state=self.random_state)

        self.assertIsInstance(result, pdml.ModelSeries)
        tm.assert_index_equal(result.index, df.index)
        tm.assert_numpy_array_equal(result.values, expected)

Source File: LST.py From python-urbanPlanning with MIT License

5 votes

def __init__(self,LST):
        self.LST=LST
##应用spectral_clustering（）聚类。此次实验中，深入分析时未使用该数据，可以自行解读所建立的区域反映LST数据的含义

Source File: test_spectral.py From twitter-stock-recommendation with MIT License

5 votes

def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver  # noqa

        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, eigen_solver="amg")
        # We don't care too much that it's good, just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding, S,
                      n_components=len(centers),
                      random_state=0, eigen_solver="amg")

Source File: test_spectral.py From twitter-stock-recommendation with MIT License

5 votes

def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>")

Source File: test_spectral.py From twitter-stock-recommendation with MIT License

5 votes

def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>")

Source File: cv.py From snfpy with GNU Lesser General Public License v3.0

4 votes

def compute_SNF(*data, metric='sqeuclidean', K=20, mu=1, n_clusters=None,
                t=20, n_perms=1000, normalize=True, seed=None):
    """
    Runs a full SNF on `data` and returns cluster affinity scores and labels

    Parameters
    ----------
    *data : (N, M) array_like
        Raw data arrays, where `N` is samples and `M` is features.
    metric : str or list-of-str, optional
        Distance metrics to compute on `data`. Must be one of available metrics
        in ``scipy.spatial.distance.pdist``. If a list is provided for `data` a
        list of equal length may be supplied here. Default: 'sqeuclidean'
    K : int, optional
        Number of neighbors to compare similarity against. Default: 20
    mu : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 0.5
    n_clusters : int or list-of-int, optional
        Number of clusters to find in combined data. Default: determined by
        eigengap (see `compute.get_n_clusters()`)
    t : int, optional
        Number of iterations to perform information swapping. Default: 20
    n_perms : int, optional
        Number of permutations for calculating z_affinity. Default: 1000
    normalize : bool, optional
        Whether to normalize (zscore) the data before constructing the affinity
        matrix. Each feature is separately normalized. Default: True

    Returns
    -------
    z_affinity : list-of-float
        Z-score of silhouette (affinity) score
    snf_labels : list of (N,) np.ndarray
        Cluster labels for subjects
    """

    rs = check_random_state(seed)

    # make affinity matrices for all inputs and run SNF
    all_aff = compute.make_affinity(*data, metric=metric, K=K, mu=mu,
                                    normalize=normalize)
    snf_aff = compute.snf(*all_aff, K=K, t=t)

    # get estimated number of clusters (if not provided)
    if n_clusters is None:
        n_clusters = [compute.get_n_clusters(snf_aff)[0]]
    elif isinstance(n_clusters, int):
        n_clusters = [n_clusters]

    # perform spectral clustering across all `n_clusters`
    snf_labels = [spectral_clustering(snf_aff, clust, random_state=rs)
                  for clust in n_clusters]

    # get z-affinity as desired
    if n_perms is not None and n_perms > 0:
        z_affinity = [metrics.affinity_zscore(snf_aff, label, n_perms, seed=rs)
                      for label in snf_labels]
        return z_affinity, snf_labels

    return snf_labels

Source File: LST.py From python-urbanPlanning with MIT License

4 votes

def LSTClustering(self):
        # 参考“Segmenting the picture of greek coins in regions”方法，Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung
        # License: BSD 3 clause
        orig_coins=self.LST
        # these were introduced in skimage-0.14
        if LooseVersion(skimage.__version__) >= '0.14':
            rescale_params = {'anti_aliasing': False, 'multichannel': False}
        else:
            rescale_params = {}
        smoothened_coins = gaussian_filter(orig_coins, sigma=2)
        rescaled_coins = rescale(smoothened_coins, 0.2, mode="reflect",**rescale_params)        
        # Convert the image into a graph with the value of the gradient on the
        # edges.
        graph = image.img_to_graph(rescaled_coins)        
        # Take a decreasing function of the gradient: an exponential
        # The smaller beta is, the more independent the segmentation is of the
        # actual image. For beta=1, the segmentation is close to a voronoi
        beta = 10
        eps = 1e-6
        graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps        
        # Apply spectral clustering (this step goes much faster if you have pyamg
        # installed)
        N_REGIONS = 200        
        for assign_labels in ('discretize',):
#        for assign_labels in ('kmeans', 'discretize'):
            t0 = time.time()
            labels = spectral_clustering(graph, n_clusters=N_REGIONS,assign_labels=assign_labels, random_state=42)
            t1 = time.time()
            labels = labels.reshape(rescaled_coins.shape)
        
            plt.figure(figsize=(5*3, 5*3))
            plt.imshow(rescaled_coins, cmap=plt.cm.gray)
            for l in range(N_REGIONS):
                plt.contour(labels == l,
                            colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])
            plt.xticks(())
            plt.yticks(())
            title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
            print(title)
            plt.title(title)
        plt.show()

##基于卷积温度梯度变化界定冷区和热区的空间分布结构

Python sklearn.cluster.spectral_clustering() Examples