Python scipy.sparse.csgraph.minimum_spanning_tree() Examples

The following are 16 code examples of scipy.sparse.csgraph.minimum_spanning_tree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse.csgraph , or try the search function .
Example #1
Source File: paga.py    From scvelo with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(
        self,
        adata,
        groups=None,
        vkey=None,
        use_time_prior=None,
        root_key=None,
        end_key=None,
        threshold_root_end_prior=None,
        minimum_spanning_tree=None,
    ):
        super().__init__(adata=adata, groups=groups, model="v1.2")
        self.groups = groups
        self.vkey = vkey
        self.use_time_prior = use_time_prior
        self.root_key = root_key
        self.end_key = end_key
        self.threshold_root_end_prior = threshold_root_end_prior
        if self.threshold_root_end_prior is None:
            self.threshold_root_end_prior = 0.9
        self.minimum_spanning_tree = minimum_spanning_tree 
Example #2
Source File: DBCV.py    From DBCV with MIT License 6 votes vote down vote up
def _mutual_reach_dist_MST(dist_tree):
    """
    Computes minimum spanning tree of the mutual reach distance complete graph

    Args:
        dist_tree (np.ndarray): array of dimensions (n_samples, n_samples)
            Graph of all pair-wise mutual reachability distances
            between points.

    Returns: minimum_spanning_tree (np.ndarray)
        array of dimensions (n_samples, n_samples)
        minimum spanning tree of all pair-wise mutual reachability
            distances between points.
    """
    mst = minimum_spanning_tree(dist_tree).toarray()
    return mst + np.transpose(mst) 
Example #3
Source File: best_solution_in_the_wuuuuuuurld.py    From HashCode with Apache License 2.0 6 votes vote down vote up
def _place_mst_paths(d, routers, idx, idy, dists):
    # calc mst
    mat = csr_matrix((dists, (idx, idy)), shape=(len(routers), len(routers)))
    Tmat = minimum_spanning_tree(mat).toarray()

    # place cabels
    for i, r in enumerate(Tmat):
        for j, c in enumerate(r):
            if Tmat[i, j] > 0:
                cables = find_chess_connection(routers[i], routers[j])
                for cable in cables:
                    if cable == d['backbone']:
                        continue
                    if d['graph'][cable] == Cell.Router:
                        d['graph'][cable] = Cell.ConnectedRouter
                    else:
                        d['graph'][cable] = Cell.Cable

    for router in routers:
        if router == d['backbone']:
            continue
        d['graph'][router] = Cell.ConnectedRouter

    return d 
Example #4
Source File: perm.py    From sGDML with MIT License 6 votes vote down vote up
def sync_perm_mat(match_perms_all, match_cost, n_atoms):

    tree = minimum_spanning_tree(match_cost, overwrite=True)

    perms = np.arange(n_atoms, dtype=int)[None, :]
    rows, cols = tree.nonzero()
    for com in zip(rows, cols):
        perm = match_perms_all.get(com)
        if perm is not None:
            perms = np.vstack((perms, perm))
    perms = np.unique(perms, axis=0)
    ui.progr_toggle(
        is_done=True, disp_str='Multi-partite matching (permutation synchronization)'
    )

    return perms 
Example #5
Source File: logo.py    From viznet with MIT License 5 votes vote down vote up
def logo3():
    viznet.setting.node_setting['inner_lw'] = 0
    viznet.setting.node_setting['lw'] = 0
    npoint = 60
    nedge = 50
    angle = random(npoint)*2*np.pi
    #r = np.exp(randn(npoint)*0.4)
    r = np.sqrt(randn(npoint))
    xy = np.array([r*np.cos(angle), r*np.sin(angle)]).T
    #xy = randn(npoint, 2)*0.5
    with viznet.DynamicShow(figsize=(4,4), filename='_logo3.png') as ds:
        #body = viznet.NodeBrush('tn.mps', size='huge', color='#AACCFF') >> (0, 0)
        dot = viznet.NodeBrush('tn.mps', size='tiny')
        node_list = []
        for i, p in enumerate(xy):
            dot.color = random(3)*0.5+0.5
            dot.zorder = 100+i*2
            dot.size = 0.05+0.08*random()
            node_list.append(dot >> p)
        dis_mat = np.linalg.norm(xy-xy[:,None,:], axis=-1)
        tree = minimum_spanning_tree(dis_mat).tocoo()
        for i, j in zip(tree.row, tree.col):
            n1,n2=node_list[i],node_list[j]
            viznet.EdgeBrush(choice(['.>.', '.>.']), lw=1, color=random([3])*0.4, zorder=(n1.obj.zorder+n2.obj.zorder)/2) >> (n1,n2)
        #for i in range(nedge):
        #    n1, n2 =choice(node_list),choice(node_list)
         #   viznet.EdgeBrush(choice(['.>.', '->-']), lw=1, color=random([3])*0.4, zorder=(n1.obj.zorder+n2.obj.zorder)/2) >> (n1,n2) 
Example #6
Source File: _paga.py    From scanpy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_connectivities_tree_v1_2(self):
        inverse_connectivities = self.connectivities.copy()
        inverse_connectivities.data = 1./inverse_connectivities.data
        connectivities_tree = minimum_spanning_tree(inverse_connectivities)
        connectivities_tree_indices = [
            connectivities_tree[i].nonzero()[1]
            for i in range(connectivities_tree.shape[0])]
        connectivities_tree = sp.sparse.lil_matrix(self.connectivities.shape, dtype=float)
        for i, neighbors in enumerate(connectivities_tree_indices):
            if len(neighbors) > 0:
                connectivities_tree[i, neighbors] = self.connectivities[i, neighbors]
        return connectivities_tree.tocsr() 
Example #7
Source File: _paga.py    From scanpy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_connectivities_tree_v1_0(self, inter_es):
        inverse_inter_es = inter_es.copy()
        inverse_inter_es.data = 1./inverse_inter_es.data
        connectivities_tree = minimum_spanning_tree(inverse_inter_es)
        connectivities_tree_indices = [
            connectivities_tree[i].nonzero()[1]
            for i in range(connectivities_tree.shape[0])]
        connectivities_tree = sp.sparse.lil_matrix(inter_es.shape, dtype=float)
        for i, neighbors in enumerate(connectivities_tree_indices):
            if len(neighbors) > 0:
                connectivities_tree[i, neighbors] = self.connectivities[i, neighbors]
        return connectivities_tree.tocsr() 
Example #8
Source File: best_solution_in_the_wuuuuuuurld.py    From HashCode with Apache License 2.0 5 votes vote down vote up
def _mst(d, new_router, routers=[], idx=[], idy=[], dists=[]):

    new_id = len(routers)

    # calc new router dists
    for i, a in enumerate(routers):
        dist = chessboard_dist(a, new_router)
        if dist > 0:
            idx.append(i)
            idy.append(new_id)
            dists.append(dist)

    # add new router
    routers.append(new_router)
    # create matrix
    mat = csr_matrix((dists, (idx, idy)), shape=(len(routers), len(routers)))

    # minimal spanning tree
    Tmat = minimum_spanning_tree(mat)

    # check costs
    cost = np.sum(Tmat) * d['price_backbone'] + (len(routers) - 1) * d['price_router']
    succ = cost <= d['original_budget']

    # return
    return succ, cost, routers, idx, idy, dists 
Example #9
Source File: DPRL.py    From CROHME_2014 with GNU General Public License v3.0 5 votes vote down vote up
def get_MST(symbol_candidate_list):
        symbol_num = len(symbol_candidate_list)
        symbol_dis = [[0.0 for x in xrange(int(symbol_num))] for x in xrange(int(symbol_num))]
        for i in range(symbol_num):
                for j in range(symbol_num):
                        if j > i:
                                symbol_dis[i][j] =  symbol_candidate_list[i].closest_distance(symbol_candidate_list[j])

        symbol_dis_matrix = csr_matrix(symbol_dis)
        Tcsr = minimum_spanning_tree(symbol_dis_matrix)
        MST = Tcsr.toarray()
        return MST 
Example #10
Source File: edgeConstruction.py    From DCC with MIT License 4 votes vote down vote up
def mkNN(X, k, measure='euclidean'):
    """
    Construct mutual_kNN for large scale dataset

    If j is one of i's closest neighbors and i is also one of j's closest members,
    the edge will appear once with (i,j) where i < j.

    Parameters
    ----------
    X : [n_samples, n_dim] array
    k : int
      number of neighbors for each sample in X
    """
    from scipy.spatial import distance
    from scipy.sparse import csr_matrix, triu, find
    from scipy.sparse.csgraph import minimum_spanning_tree

    samples = X.shape[0]
    batchsize = 10000
    b = np.arange(k + 1)
    b = tuple(b[1:].ravel())

    z = np.zeros((samples, k))
    weigh = np.zeros_like(z)

    # This loop speeds up the computation by operating in batches
    # This can be parallelized to further utilize CPU/GPU resource
    for x in np.arange(0, samples, batchsize):
        start = x
        end = min(x + batchsize, samples)

        w = distance.cdist(X[start:end], X, measure)

        y = np.argpartition(w, b, axis=1)

        z[start:end, :] = y[:, 1:k + 1]
        weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end - start), k)), tuple(y[:, 1:k + 1].ravel())],
                                         (end - start, k))
        del (w)

    ind = np.repeat(np.arange(samples), k)

    P = csr_matrix((np.ones((samples * k)), (ind.ravel(), z.ravel())), shape=(samples, samples))
    Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples))

    Tcsr = minimum_spanning_tree(Q)
    P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose())
    P = triu(P, k=1)

    return np.asarray(find(P)).T 
Example #11
Source File: rcc.py    From pyrcc with MIT License 4 votes vote down vote up
def m_knn(X, k, measure='euclidean'):
        """
        This code is taken from:
        https://bitbucket.org/sohilas/robust-continuous-clustering/src/
        The original terms of the license apply.
        Construct mutual_kNN for large scale dataset

        If j is one of i's closest neighbors and i is also one of j's closest members,
        the edge will appear once with (i,j) where i < j.

        Parameters
        ----------
        X (array) 2d array of data of shape (n_samples, n_dim)
        k (int) number of neighbors for each sample in X
        measure (string) distance metric, one of 'cosine' or 'euclidean'
        """

        samples = X.shape[0]
        batch_size = 10000
        b = np.arange(k+1)
        b = tuple(b[1:].ravel())

        z = np.zeros((samples, k))
        weigh = np.zeros_like(z)

        # This loop speeds up the computation by operating in batches
        # This can be parallelized to further utilize CPU/GPU resource

        for x in np.arange(0, samples, batch_size):
            start = x
            end = min(x+batch_size, samples)

            w = distance.cdist(X[start:end], X, measure)

            y = np.argpartition(w, b, axis=1)

            z[start:end, :] = y[:, 1:k + 1]
            weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end-start), k)),
                                               tuple(y[:, 1:k+1].ravel())], (end-start, k))
            del w

        ind = np.repeat(np.arange(samples), k)

        P = csr_matrix((np.ones((samples*k)), (ind.ravel(), z.ravel())), shape=(samples, samples))
        Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples))

        Tcsr = minimum_spanning_tree(Q)
        P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose())
        P = triu(P, k=1)

        V = np.asarray(find(P)).T
        return V[:, :2].astype(np.int32) 
Example #12
Source File: test_spanning_tree.py    From Computable with MIT License 4 votes vote down vote up
def test_minimum_spanning_tree():

    # Create a graph with two connected components.
    graph = [[0,1,0,0,0],
             [1,0,0,0,0],
             [0,0,0,8,5],
             [0,0,8,0,1],
             [0,0,5,1,0]]
    graph = np.asarray(graph)

    # Create the expected spanning tree.
    expected = [[0,1,0,0,0],
                [0,0,0,0,0],
                [0,0,0,0,5],
                [0,0,0,0,1],
                [0,0,0,0,0]]
    expected = np.asarray(expected)

    # Ensure minimum spanning tree code gives this expected output.
    csgraph = csr_matrix(graph)
    mintree = minimum_spanning_tree(csgraph)
    npt.assert_array_equal(mintree.todense(), expected,
        'Incorrect spanning tree found.')

    # Ensure that the original graph was not modified.
    npt.assert_array_equal(csgraph.todense(), graph,
        'Original graph was modified.')

    # Now let the algorithm modify the csgraph in place.
    mintree = minimum_spanning_tree(csgraph, overwrite=True)
    npt.assert_array_equal(mintree.todense(), expected,
        'Graph was not properly modified to contain MST.')

    np.random.seed(1234)
    for N in (5, 10, 15, 20):

        # Create a random graph.
        graph = 3 + np.random.random((N, N))
        csgraph = csr_matrix(graph)

        # The spanning tree has at most N - 1 edges.
        mintree = minimum_spanning_tree(csgraph)
        assert_(mintree.nnz < N)

        # Set the sub diagonal to 1 to create a known spanning tree.
        idx = np.arange(N-1)
        graph[idx,idx+1] = 1
        csgraph = csr_matrix(graph)
        mintree = minimum_spanning_tree(csgraph)

        # We expect to see this pattern in the spanning tree and otherwise
        # have this zero.
        expected = np.zeros((N, N))
        expected[idx, idx+1] = 1

        npt.assert_array_equal(mintree.todense(), expected,
            'Incorrect spanning tree found.') 
Example #13
Source File: hierarchical.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,
                         n_connected_components, return_distance):
    """
    Perform single linkage clustering on sparse data via the minimum
    spanning tree from scipy.sparse.csgraph, then using union-find to label.
    The parent array is then generated by walking through the tree.
    """
    from scipy.sparse.csgraph import minimum_spanning_tree

    # explicitly cast connectivity to ensure safety
    connectivity = connectivity.astype('float64',
                                       **_astype_copy_false(connectivity))

    # Ensure zero distances aren't ignored by setting them to "epsilon"
    epsilon_value = np.finfo(dtype=connectivity.data.dtype).eps
    connectivity.data[connectivity.data == 0] = epsilon_value

    # Use scipy.sparse.csgraph to generate a minimum spanning tree
    mst = minimum_spanning_tree(connectivity.tocsr())

    # Convert the graph to scipy.cluster.hierarchy array format
    mst = mst.tocoo()

    # Undo the epsilon values
    mst.data[mst.data == epsilon_value] = 0

    mst_array = np.vstack([mst.row, mst.col, mst.data]).T

    # Sort edges of the min_spanning_tree by weight
    mst_array = mst_array[np.argsort(mst_array.T[2]), :]

    # Convert edge list into standard hierarchical clustering format
    single_linkage_tree = _hierarchical._single_linkage_label(mst_array)
    children_ = single_linkage_tree[:, :2].astype(np.int)

    # Compute parents
    parent = np.arange(n_nodes, dtype=np.intp)
    for i, (left, right) in enumerate(children_, n_samples):
        if n_clusters is not None and i >= n_nodes:
            break
        if left < n_nodes:
            parent[left] = i
        if right < n_nodes:
            parent[right] = i

    if return_distance:
        distances = single_linkage_tree[:, 2]
        return children_, n_connected_components, n_samples, parent, distances
    return children_, n_connected_components, n_samples, parent


###############################################################################
# Hierarchical tree building functions 
Example #14
Source File: test_spanning_tree.py    From GraphicDesignPatternByPython with MIT License 4 votes vote down vote up
def test_minimum_spanning_tree():

    # Create a graph with two connected components.
    graph = [[0,1,0,0,0],
             [1,0,0,0,0],
             [0,0,0,8,5],
             [0,0,8,0,1],
             [0,0,5,1,0]]
    graph = np.asarray(graph)

    # Create the expected spanning tree.
    expected = [[0,1,0,0,0],
                [0,0,0,0,0],
                [0,0,0,0,5],
                [0,0,0,0,1],
                [0,0,0,0,0]]
    expected = np.asarray(expected)

    # Ensure minimum spanning tree code gives this expected output.
    csgraph = csr_matrix(graph)
    mintree = minimum_spanning_tree(csgraph)
    npt.assert_array_equal(mintree.todense(), expected,
        'Incorrect spanning tree found.')

    # Ensure that the original graph was not modified.
    npt.assert_array_equal(csgraph.todense(), graph,
        'Original graph was modified.')

    # Now let the algorithm modify the csgraph in place.
    mintree = minimum_spanning_tree(csgraph, overwrite=True)
    npt.assert_array_equal(mintree.todense(), expected,
        'Graph was not properly modified to contain MST.')

    np.random.seed(1234)
    for N in (5, 10, 15, 20):

        # Create a random graph.
        graph = 3 + np.random.random((N, N))
        csgraph = csr_matrix(graph)

        # The spanning tree has at most N - 1 edges.
        mintree = minimum_spanning_tree(csgraph)
        assert_(mintree.nnz < N)

        # Set the sub diagonal to 1 to create a known spanning tree.
        idx = np.arange(N-1)
        graph[idx,idx+1] = 1
        csgraph = csr_matrix(graph)
        mintree = minimum_spanning_tree(csgraph)

        # We expect to see this pattern in the spanning tree and otherwise
        # have this zero.
        expected = np.zeros((N, N))
        expected[idx, idx+1] = 1

        npt.assert_array_equal(mintree.todense(), expected,
            'Incorrect spanning tree found.') 
Example #15
Source File: dissimilarity.py    From flyingpigeon with Apache License 2.0 4 votes vote down vote up
def friedman_rafsky(x, y):
    """
    Compute a dissimilarity metric based on the Friedman-Rafsky runs statistics.

    The algorithm builds a minimal spanning tree (the subset of edges
    connecting all points that minimizes the total edge length) then counts
    the edges linking points from the same distribution.

    Parameters
    ----------
    x : ndarray (n,d)
        Reference sample.
    y : ndarray (m,d)
        Candidate sample.

    Returns
    -------
    float
        Friedman-Rafsky dissimilarity metric ranging from 0 to (m+n-1)/(m+n).

    References
    ----------
    Friedman J.H. and Rafsky L.C. (1979) Multivariate generaliations of the
    Wald-Wolfowitz and Smirnov two-sample tests. Annals of Stat. Vol.7,
    No. 4, 697-717.
    """
    from sklearn import neighbors
    from scipy.sparse.csgraph import minimum_spanning_tree

    x, y = reshape_sample(x, y)
    nx, _ = x.shape
    ny, _ = y.shape
    n = nx + ny

    xy = np.vstack([x, y])

    # Compute the NNs and the minimum spanning tree
    g = neighbors.kneighbors_graph(xy, n_neighbors=n - 1, mode='distance')
    mst = minimum_spanning_tree(g, overwrite=True)
    edges = np.array(mst.nonzero()).T

    # Number of points whose neighbor is from the other sample
    diff = np.logical_xor(*(edges < nx).T).sum()

    return 1. - (1. + diff) / n 
Example #16
Source File: util.py    From region with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _randomly_divide_connected_graph(adj, n_regions):
    """
    Divide the provided connected graph into `n_regions` regions.

    Parameters
    ----------
    adj : :class:`scipy.sparse.csr_matrix`
        Adjacency matrix.
    n_regions : int
        The desired number of clusters. Must be > 0 and <= number of nodes.

    Returns
    -------
    labels : :class:`numpy.ndarray`
        Each element (an integer in {0, ..., `n_regions` - 1}) specifies the
        region an area (defined by the index in the array) belongs to.

    Examples
    --------
    >>> from scipy.sparse import diags
    >>> n_nodes = 10
    >>> adj_diagonal = [1] * (n_nodes-1)
    >>> # 10x10 adjacency matrix representing the path 0-1-2-...-9-10
    >>> adj = diags([adj_diagonal, adj_diagonal], offsets=[-1, 1])
    >>> n_regions_desired = 4
    >>> labels = _randomly_divide_connected_graph(adj, n_regions_desired)
    >>> n_regions_obtained = len(set(labels))
    >>> n_regions_desired == n_regions_obtained
    True
    """
    if not n_regions > 0:
        msg = "n_regions is {} but must be positive.".format(n_regions)
        raise ValueError(msg)
    n_areas = adj.shape[0]
    if not n_regions <= n_areas:
        msg = (
            "n_regions is {} but must less than or equal to "
            + "the number of nodes which is {}".format(n_regions, n_areas)
        )
        raise ValueError(msg)
    mst = csg.minimum_spanning_tree(adj)
    for _ in range(n_regions - 1):
        # try different links to cut and pick the one leading to the most
        # balanced solution
        best_link = None
        max_region_size = float("inf")
        for __ in range(5):
            mst_copy = mst.copy()
            nonzero_i, nonzero_j = mst_copy.nonzero()
            random_position = random.randrange(len(nonzero_i))
            i, j = nonzero_i[random_position], nonzero_j[random_position]
            mst_copy[i, j] = 0
            mst_copy.eliminate_zeros()
            labels = csg.connected_components(mst_copy, directed=False)[1]
            max_size = max(np.unique(labels, return_counts=True)[1])
            if max_size < max_region_size:
                best_link = (i, j)
                max_region_size = max_size
        mst[best_link[0], best_link[1]] = 0
        mst.eliminate_zeros()
    return csg.connected_components(mst)[1]