Python scipy.sparse.triu() Examples

The following are 27 code examples of scipy.sparse.triu(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .
Example #1
Source File: hamming.py    From ruptures with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def hamming(bkps1, bkps2):
    """Modified Hamming distance for partitions.
    For all pair of points (x, y), x != y, the functions computes the
    number of times the two partitions disagree.
    The result is scaled to be within 0 and 1.

    Args:
        bkps1 (list): list of the last index of each regime.
        bkps2 (list): list of the last index of each regime.

    Returns:
        float: Hamming distance.
    """
    sanity_check(bkps1, bkps2)
    n_samples = max(bkps1)

    disagreement = abs(membership_mat(bkps1) - membership_mat(bkps2))
    disagreement = triu(disagreement, k=1).sum() * 1.
    disagreement /= n_samples * n_samples / 2  # scaling
    return disagreement 
Example #2
Source File: utils.py    From node_embedding_attack with MIT License 6 votes vote down vote up
def construct_line_graph(adj_matrix):
    """Construct a line graph from an undirected original graph.

    Parameters
    ----------
    adj_matrix : sp.spmatrix [n_samples ,n_samples]
        Symmetric binary adjacency matrix.

    Returns
    -------
    L : sp.spmatrix, shape [A.nnz/2, A.nnz/2]
        Symmetric binary adjacency matrix of the line graph.
    """
    N = adj_matrix.shape[0]
    edges = np.column_stack(sp.triu(adj_matrix, 1).nonzero())
    e1, e2 = edges[:, 0], edges[:, 1]

    I = sp.eye(N).tocsr()
    E1 = I[e1]
    E2 = I[e2]

    L = E1.dot(E1.T) + E1.dot(E2.T) + E2.dot(E1.T) + E2.dot(E2.T)

    return L - 2 * sp.eye(L.shape[0]) 
Example #3
Source File: perturbation_attack.py    From node_embedding_attack with MIT License 6 votes vote down vote up
def baseline_eigencentrality_top_flips(adj_matrix, candidates, n_flips):
    """Selects the top (n_flips) number of flips using eigencentrality score of the edges.
    Applicable only when removing edges.

    :param adj_matrix: sp.spmatrix
        The graph represented as a sparse scipy matrix
    :param candidates: np.ndarray, shape [?, 2]
        Candidate set of edge flips
    :param n_flips: int
        Number of flips to select
    :return: np.ndarray, shape [?, 2]
        The top edge flips from the candidate set
    """
    edges = np.column_stack(sp.triu(adj_matrix, 1).nonzero())
    line_graph = construct_line_graph(adj_matrix)
    eigcentrality_scores = nx.eigenvector_centrality_numpy(nx.Graph(line_graph))
    eigcentrality_scores = {tuple(edges[k]): eigcentrality_scores[k] for k, v in eigcentrality_scores.items()}
    eigcentrality_scores = np.array([eigcentrality_scores[tuple(cnd)] for cnd in candidates])

    scores_argsrt = eigcentrality_scores.argsort()

    return candidates[scores_argsrt[-n_flips:]] 
Example #4
Source File: basic_test.py    From osqp-python with Apache License 2.0 6 votes vote down vote up
def test_upper_triangular_P(self):
        res_default = self.model.solve()

        # Get upper triangular P
        P_triu = sparse.triu(self.P, format='csc')

        # Setup and solve with upper triangular part only
        m = osqp.OSQP()
        m.setup(P=P_triu, q=self.q, A=self.A, l=self.l, u=self.u,
                **self.opts)
        res_triu = m.solve()

        # Assert equal
        nptest.assert_array_almost_equal(res_default.x, res_triu.x)
        nptest.assert_array_almost_equal(res_default.y, res_triu.y)
        nptest.assert_array_almost_equal(res_default.info.obj_val, 
                                         res_triu.info.obj_val) 
Example #5
Source File: coolpup.py    From coolpuppy with MIT License 6 votes vote down vote up
def get_data(self, region):
        """Get sparse data for a region

        Parameters
        ----------
        region : tuple or str
            Region for which to load the data. Either tuple of (chr, start, end), or
            string with chromosome name.

        Returns
        -------
        data : csr
            Sparse csr matrix for the corresponding region.

        """
        logging.debug("Loading data")
        data = self.clr.matrix(sparse=True, balance=self.balance).fetch(region)
        data = sparse.triu(data)
        return data.tocsr() 
Example #6
Source File: test_hicConvertFormat.py    From HiCExplorer with GNU General Public License v3.0 6 votes vote down vote up
def test_hicConvertFormat_hicpro_to_cool():

    outfile = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile.close()
    hicprofile = ROOT + '/test_matrix.hicpro'
    bedfile = ROOT + '/test_matrix.bed'
    args = "--matrices {} --outFileName {} --inputFormat hicpro --outputFormat cool --bedFileHicpro {}".format(hicprofile, outfile.name, bedfile).split()
    hicConvertFormat.main(args)

    # test = hm.hiCMatrix(original_matrix_cool)
    # print(outfile.name)
    new = hm.hiCMatrix(outfile.name)

    matrixFileHandlerInput = MatrixFileHandler(pFileType='hicpro', pMatrixFile=hicprofile,
                                               pBedFileHicPro=bedfile)

    _matrix, cut_intervals, nan_bins, \
        distance_counts, correction_factors = matrixFileHandlerInput.load()

    new.matrix = triu(new.matrix)
    nt.assert_array_almost_equal(new.matrix.data, _matrix.data, decimal=0) 
Example #7
Source File: quadratic_expression.py    From qiskit-aqua with Apache License 2.0 5 votes vote down vote up
def _symmetric_matrix(mat: dok_matrix) -> dok_matrix:
        upper = triu(mat, 1, format='dok') / 2
        # `todok` is necessary because subtraction results in other format
        return (mat + upper.transpose() - upper).todok() 
Example #8
Source File: csi.py    From biclustlib with GNU General Public License v3.0 5 votes vote down vote up
def _triu(a, sparse):
    if sparse:
        return sp.triu(a, k=1)
    return np.triu(a, k=1) 
Example #9
Source File: __init__.py    From python-igraph with GNU General Public License v2.0 5 votes vote down vote up
def get_adjacency_sparse(self, attribute=None):
        """Returns the adjacency matrix of a graph as scipy csr matrix.
        @param attribute: if C{None}, returns the ordinary adjacency
          matrix. When the name of a valid edge attribute is given
          here, the matrix returned will contain the default value
          at the places where there is no edge or the value of the
          given attribute where there is an edge.
        @return: the adjacency matrix as a L{scipy.sparse.csr_matrix}."""
        try:
            from scipy import sparse
        except ImportError:
            raise ImportError('You should install scipy package in order to use this function')
        import numpy as np

        edges = self.get_edgelist()
        if attribute is None:
            weights = [1] * len(edges)
        else:
            if attribute not in self.es.attribute_names():
                raise ValueError("Attribute does not exist")

            weights = self.es[attribute]

        N = self.vcount()
        mtx = sparse.csr_matrix((weights, zip(*edges)), shape=(N, N))

        if not self.is_directed():
            mtx = mtx + sparse.triu(mtx, 1).T + sparse.tril(mtx, -1).T
        return mtx 
Example #10
Source File: conftest.py    From cooler with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def make_random(cls, chrom_offsets, binsize, density):
        chrom_nbins = np.diff(chrom_offsets)
        assert chrom_offsets[0] == 0 and np.all(np.diff(chrom_offsets) >= 0)
        n_chroms = len(chrom_offsets) - 1
        n_bins = chrom_offsets[-1]
        chroms = {
            "name": np.array(
                ["chr" + str(i) for i in range(1, n_chroms + 1)], dtype="S"
            ),
            "length": np.array(
                [chrom_nbins[i] * binsize for i in range(n_chroms)], dtype=np.int32
            ),
        }
        bins = {
            "chrom": np.concatenate([[i] * chrom_nbins[i] for i in range(n_chroms)]),
            "start": np.concatenate(
                [
                    np.arange(0, chrom_nbins[i] * binsize, binsize)
                    for i in range(n_chroms)
                ]
            ),
            "end": np.concatenate(
                [
                    np.arange(binsize, chrom_nbins[i] * (binsize + 1), binsize)
                    for i in range(n_chroms)
                ]
            ),
        }
        r = sparse.random(n_bins, n_bins, density=density, random_state=1)
        r = sparse.triu(r, k=1).tocsr()
        pixels = {"bin1_id": r.tocoo().row, "bin2_id": r.indices, "count": r.data}
        indexes = {
            "chrom_offset": np.array(chrom_offsets, dtype=np.int32),
            "bin1_offset": r.indptr,
        }
        return cls(chroms, bins, pixels, indexes, binsize) 
Example #11
Source File: utils.py    From node_embedding_attack with MIT License 5 votes vote down vote up
def generate_candidates_removal(adj_matrix, seed=0):
    """Generates candidate edge flips for removal (edge -> non-edge),
     disallowing one random edge per node to prevent singleton nodes.

    adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
        Adjacency matrix of the graph
    :param seed: int
        Random seed
    :return: np.ndarray, shape [?, 2]
        Candidate set of edge flips
    """
    n_nodes = adj_matrix.shape[0]

    np.random.seed(seed)
    deg = np.where(adj_matrix.sum(1).A1 == 1)[0]

    hiddeen = np.column_stack(
        (np.arange(n_nodes), np.fromiter(map(np.random.choice, adj_matrix.tolil().rows), dtype=np.int)))

    adj_hidden = edges_to_sparse(hiddeen, adj_matrix.shape[0])
    adj_hidden = adj_hidden.maximum(adj_hidden.T)

    adj_keep = adj_matrix - adj_hidden

    candidates = np.column_stack((sp.triu(adj_keep).nonzero()))

    candidates = candidates[np.logical_not(np.in1d(candidates[:, 0], deg) | np.in1d(candidates[:, 1], deg))]

    return candidates 
Example #12
Source File: utils.py    From node_embedding_attack with MIT License 5 votes vote down vote up
def generate_candidates_removal_minimum_spanning_tree(adj_matrix):
    """Generates candidate edge flips for removal (edge -> non-edge),
     disallowing edges that lie on the minimum spanning tree.

    adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
        Adjacency matrix of the graph
    :return: np.ndarray, shape [?, 2]
        Candidate set of edge flips
    """
    mst = sp.csgraph.minimum_spanning_tree(adj_matrix)
    mst = mst.maximum(mst.T)
    adj_matrix_sample = adj_matrix - mst
    candidates = np.column_stack(sp.triu(adj_matrix_sample, 1).nonzero())

    return candidates 
Example #13
Source File: sample.py    From GPF with MIT License 5 votes vote down vote up
def sample_pairs(net, test_ratio=0.1, train_pos=None, test_pos=None, max_train_num=None):
    # get upper triangular matrix
    net_triu = ssp.triu(net, k=1)
    # sample positive links for train/test
    row, col, _ = ssp.find(net_triu)
    # sample positive links if not specified
    if train_pos is None or test_pos is None:
        perm = random.sample(range(len(row)), len(row))
        row, col = row[perm], col[perm]
        split = int(math.ceil(len(row) * (1 - test_ratio)))
        train_pos = (row[:split], col[:split])
        test_pos = (row[split:], col[split:])
    # if max_train_num is set, randomly sample train links
    if max_train_num is not None:
        perm = np.random.permutation(len(train_pos[0]))[:max_train_num]
        train_pos = (train_pos[0][perm], train_pos[1][perm])
    # sample negative links for train/test
    train_num, test_num = len(train_pos[0]), len(test_pos[0])
    neg = ([], [])
    n = net.shape[0]
    print('sampling negative links for train and test')
    while len(neg[0]) < train_num + test_num:
        i, j = random.randint(0, n-1), random.randint(0, n-1)
        if i < j and net[i, j] == 0:
            neg[0].append(i)
            neg[1].append(j)
        else:
            continue
    train_neg  = (neg[0][:train_num], neg[1][:train_num])
    test_neg = (neg[0][train_num:], neg[1][train_num:])
    
    return train_pos, train_neg, test_pos, test_neg 
Example #14
Source File: common.py    From sparse with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def triu(x, k=0):
    """
    Returns an array with all elements below the k-th diagonal set to zero.

    Parameters
    ----------
    x : COO
        The input array.
    k : int, optional
        The diagonal below which elements are set to zero. The default is
        zero, which corresponds to the main diagonal.

    Returns
    -------
    COO
        The output upper-triangular matrix.

    Raises
    ------
    ValueError
        If :code:`x` doesn't have zero fill-values.

    See Also
    --------
    numpy.triu : NumPy equivalent function
    """
    from .core import COO

    check_zero_fill_value(x)

    if not x.ndim >= 2:
        raise NotImplementedError(
            "sparse.triu is not implemented for scalars or 1-D arrays."
        )

    mask = x.coords[-2] + k <= x.coords[-1]

    coords = x.coords[:, mask]
    data = x.data[mask]

    return COO(coords, data, shape=x.shape, has_duplicates=False, sorted=True) 
Example #15
Source File: test_coo.py    From sparse with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_triul(shape, k):
    s = sparse.random(shape, density=0.5)
    x = s.todense()

    assert_eq(np.triu(x, k), sparse.triu(s, k))
    assert_eq(np.tril(x, k), sparse.tril(s, k)) 
Example #16
Source File: sim.py    From clusim with MIT License 5 votes vote down vote up
def omega_index(clustering1, clustering2):
    '''
    This function calculates the omega index between two clusterings.

    See :cite:`Collins1988omega` for a detailed derivation and explanation of the measure.

    :param Clustering clustering1:
        The first clustering.

    :param Clustering clustering2:
        The second clustering.

    :returns: the omega index
    '''
    if clustering1.n_elements != clustering2.n_elements:
        raise ClusteringSimilarityError

    elif any(e1 != e2 for e1, e2 in zip(clustering1.elements, clustering2.elements)):
        raise ClusteringSimilarityError

    A1 = make_overlapping_membership_matrix(clustering1)
    A2 = make_overlapping_membership_matrix(clustering2)

    M = clustering1.n_elements * (clustering1.n_elements - 1) / 2.0

    maxNover = max(max(A1.diagonal()), max(A2.diagonal())) + 1

    Anot = spsparse.triu((A1 != A2), k=1).sum()

    omega_u = 1.0 - Anot.sum() / M

    t_0_1 = M - spsparse.triu((A1 != 0), k=1).sum()
    t_0_2 = M - spsparse.triu((A2 != 0), k=1).sum()

    t_k_1 = [spsparse.triu((A1 == i), k=1).sum() for i in range(1, maxNover)]
    t_k_2 = [spsparse.triu((A2 == i), k=1).sum() for i in range(1, maxNover)]

    omega_e = (t_0_1*t_0_2 + np.dot(t_k_1, t_k_2)) / M**2

    return (omega_u - omega_e) / (1.0 - omega_e) 
Example #17
Source File: mesh_elements.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_edges(surf, mask=None):
    """Get surface edges.

    Parameters
    ----------
    surf : vtkDataSet or BSDataSet
        Input surface.
    mask : 1D ndarray, optional
        Binary mask. If specified, only use points within the mask.
        Default is None.

    Returns
    -------
    edges : ndarray, shape (n_edges, 2)
        Array of edges. Each element is a point id.

    See Also
    --------
    :func:`get_edge_length`
    :func:`get_points`
    :func:`get_cells`

    """

    adj = get_immediate_adjacency(surf, include_self=False, mask=mask,
                                  dtype=np.bool)
    adj.sort_indices()
    adj_ud = ssp.triu(adj, k=1, format='coo')
    edges = np.column_stack([adj_ud.row, adj_ud.col])
    return edges 
Example #18
Source File: polishing_test.py    From osqp-python with Apache License 2.0 5 votes vote down vote up
def test_polish_random(self):

        # Random QP problem
        sp.random.seed(6)

        self.n = 30
        self.m = 50
        Pt = sp.randn(self.n, self.n)
        self.P = sparse.triu(np.dot(Pt.T, Pt), format='csc')
        self.q = sp.randn(self.n)
        self.A = sparse.csc_matrix(sp.randn(self.m, self.n))
        self.l = -3 + sp.randn(self.m)
        self.u = 3 + sp.randn(self.m)
        self.model = osqp.OSQP()
        self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u,
                         **self.opts)

        # Solve problem
        res = self.model.solve()

        # Assert close
        nptest.assert_array_almost_equal(
            res.x, np.array([
                -0.58549607, 0.0030388, -0.07154039, -0.0406463, -0.13349925,
                -0.1354755, -0.17417362, 0.0165324, -0.12213118, -0.10477034,
                -0.51748662, -0.05310921, 0.07862616, 0.53663003, -0.01459859,
                0.40678716, -0.03496123, 0.25722838, 0.06335071, 0.29908295,
                -0.6223218, -0.07614658, -0.3892153, -0.18111635, 0.56301768,
                0.10429917, 0.09821862, -0.30881928, 0.24430531, 0.06597486]))
        nptest.assert_array_almost_equal(
            res.y, np.array([
                0., -2.11407101e-01, 0., 0., 0., 0., 0., 0., 0.,
                0., -3.78854588e-02, 0., -1.58346998e-02, 0., 0.,
                -6.88711599e-02, 0., 0., 0., 0., 0., 0., 0., 0.,
                6.04385132e-01, 0., 0., 0., 0., 0., 0., 0., 0.,
                0., 1.37995470e-01, 0., 0., 0.,  -2.04427802e-02,
                0., -1.32983915e-01, 0., 2.94425952e-02, 0., 0.,
                0., 0., 0., -6.53409219e-02, 0.]))
        nptest.assert_array_almost_equal(res.info.obj_val, -3.262280663471232) 
Example #19
Source File: non_convex_test.py    From osqp-python with Apache License 2.0 5 votes vote down vote up
def setUp(self):

        # Simple QP problem
        self.P = sparse.triu([[2., 5.], [5., 1.]], format='csc')
        self.q = np.array([3, 4])
        self.A = sparse.csc_matrix([[-1.0, 0.], [0., -1.],
                                    [-1., 3.], [2., 5.], [3., 4]])
        self.u = np.array([0., 0., -15, 100, 80])
        self.l = -np.inf * np.ones(len(self.u))
        self.model = osqp.OSQP() 
Example #20
Source File: update_matrices_test.py    From osqp-python with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        # Simple QP problem
        sp.random.seed(1)

        self.n = 5
        self.m = 8
        p = 0.7

        Pt = sparse.random(self.n, self.n, density=p)
        Pt_new = Pt.copy()
        Pt_new.data += 0.1 * np.random.randn(Pt.nnz)

        self.P = sparse.triu(Pt.T.dot(Pt) + sparse.eye(self.n), format='csc')
        self.P_new = sparse.triu(Pt_new.T.dot(Pt_new) + sparse.eye(self.n), format='csc')
        self.q = np.random.randn(self.n)
        self.A = sparse.random(self.m, self.n, density=p, format='csc')
        self.A_new = self.A.copy()
        self.A_new.data += np.random.randn(self.A_new.nnz)
        self.l = np.zeros(self.m)
        self.u = 30 + np.random.randn(self.m)
        self.opts = {'eps_abs': 1e-08,
                     'eps_rel': 1e-08,
                     'verbose': False}
        self.model = osqp.OSQP()
        self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u,
                         **self.opts) 
Example #21
Source File: warm_start_test.py    From osqp-python with Apache License 2.0 5 votes vote down vote up
def test_warm_start(self):

        # Big problem
        sp.random.seed(2)
        self.n = 100
        self.m = 200
        self.A = sparse.random(self.m, self.n, density=0.9, format='csc')
        self.l = -sp.rand(self.m) * 2.
        self.u = sp.rand(self.m) * 2.

        P = sparse.random(self.n, self.n, density=0.9)
        self.P = sparse.triu(P.dot(P.T), format='csc')
        self.q = sp.randn(self.n)

        # Setup solver
        self.model = osqp.OSQP()
        self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u,
                         **self.opts)

        # Solve problem with OSQP
        res = self.model.solve()

        # Store optimal values
        x_opt = res.x
        y_opt = res.y
        tot_iter = res.info.iter

        # Warm start with zeros and check if number of iterations is the same
        self.model.warm_start(x=np.zeros(self.n), y=np.zeros(self.m))
        res = self.model.solve()
        self.assertEqual(res.info.iter, tot_iter)

        # Warm start with optimal values and check that number of iter < 10
        self.model.warm_start(x=x_opt, y=y_opt)
        res = self.model.solve()
        self.assertLess(res.info.iter, 10) 
Example #22
Source File: split_train_test.py    From EvalNE with MIT License 4 votes vote down vote up
def random_edge_sample(a, samp_frac=0.01, directed=False):
    r"""
    Returns a sample of positive and negative edges from the given graph represented by `a` selected uniformly at
    random without replacement. If the directed flag is set to False the samples are obtained only from the upper
    triangle.

    Parameters
    ----------
    a : sparse matrix
        A sparse adjacency matrix representing a graph.
    samp_frac : float, optional
        An float representing the fraction of elements to sample. Default is 1.0 (1%)
    directed : bool, optional
        A flag indicating if the adjacency matrix should be considered directed or undirected. If undirected
        indices are obtained only from the lower triangle. Default is False.

    Returns
    -------
    pos_e : ndarray
        Positive edges
    neg_e : ndarray
        Negative edges
    """
    n = a.shape[0]

    if directed:
        num_samp = int(n ** 2 * samp_frac / 100)
        lin_indx_a = np.ravel_multi_index(a.nonzero(), (n, n))
        # randomly generate linear indices
        lin_indx = np.random.randint(0, n ** 2, num_samp)
    else:
        # For undir graphs we only need to sample half the num nodes
        num_samp = int((n*(n-1))/2 * (samp_frac / 100))
        lin_indx_a = np.ravel_multi_index(triu(a, k=1).nonzero(), (n, n))
        ij = np.random.randint(0, n, size=(2, num_samp))
        ij.sort(axis=0)
        lin_indx = np.ravel_multi_index((ij[0], ij[1]), (n, n))

    pos_e = np.intersect1d(lin_indx, lin_indx_a)
    neg_e = np.setdiff1d(lin_indx, lin_indx_a)

    # Remove the self edges
    lin_diag_indxs = np.ravel_multi_index(np.diag_indices(n), (n, n))
    pos_e = np.setdiff1d(pos_e, lin_diag_indxs)
    neg_e = np.setdiff1d(neg_e, lin_diag_indxs)

    # Unravel the linear indices to obtain src, dst pairs
    pos_e = np.array(np.unravel_index(np.array(pos_e), (n, n))).T
    neg_e = np.array(np.unravel_index(np.array(neg_e), (n, n))).T

    return pos_e, neg_e 
Example #23
Source File: utils.py    From graph2gauss with MIT License 4 votes vote down vote up
def batch_pairs_sample(A, nodes_hide):
    """
    For a given set of nodes return all edges and an equal number of randomly sampled non-edges.

    Parameters
    ----------
    A : sp.spmatrix
        Sparse adjacency matrix

    Returns
    -------
    pairs : array-like, shape [?, 2]
        The sampled pairs.

    """
    A = A.copy()
    undiricted = (A != A.T).nnz == 0

    if undiricted:
        A = sp.triu(A, 1).tocsr()

    edges = np.column_stack(A.nonzero())
    edges = edges[np.in1d(edges[:, 0], nodes_hide) | np.in1d(edges[:, 1], nodes_hide)]

    # include the missing direction
    if undiricted:
        edges = np.row_stack((edges, np.column_stack((edges[:, 1], edges[:, 0]))))

    # sample the non-edges for each node separately
    arng = np.arange(A.shape[0])
    not_edges = []
    for nh in nodes_hide:
        nn = np.concatenate((A[nh].nonzero()[1], A[:, nh].nonzero()[0]))
        not_nn = np.setdiff1d(arng, nn)

        not_nn = np.random.permutation(not_nn)[:len(nn)]
        not_edges.append(np.column_stack((np.repeat(nh, len(nn)), not_nn)))

    not_edges = np.row_stack(not_edges)

    # include the missing direction
    if undiricted:
        not_edges = np.row_stack((not_edges, np.column_stack((not_edges[:, 1], not_edges[:, 0]))))

    pairs = np.row_stack((edges, not_edges))

    return pairs 
Example #24
Source File: rcc.py    From pyrcc with MIT License 4 votes vote down vote up
def m_knn(X, k, measure='euclidean'):
        """
        This code is taken from:
        https://bitbucket.org/sohilas/robust-continuous-clustering/src/
        The original terms of the license apply.
        Construct mutual_kNN for large scale dataset

        If j is one of i's closest neighbors and i is also one of j's closest members,
        the edge will appear once with (i,j) where i < j.

        Parameters
        ----------
        X (array) 2d array of data of shape (n_samples, n_dim)
        k (int) number of neighbors for each sample in X
        measure (string) distance metric, one of 'cosine' or 'euclidean'
        """

        samples = X.shape[0]
        batch_size = 10000
        b = np.arange(k+1)
        b = tuple(b[1:].ravel())

        z = np.zeros((samples, k))
        weigh = np.zeros_like(z)

        # This loop speeds up the computation by operating in batches
        # This can be parallelized to further utilize CPU/GPU resource

        for x in np.arange(0, samples, batch_size):
            start = x
            end = min(x+batch_size, samples)

            w = distance.cdist(X[start:end], X, measure)

            y = np.argpartition(w, b, axis=1)

            z[start:end, :] = y[:, 1:k + 1]
            weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end-start), k)),
                                               tuple(y[:, 1:k+1].ravel())], (end-start, k))
            del w

        ind = np.repeat(np.arange(samples), k)

        P = csr_matrix((np.ones((samples*k)), (ind.ravel(), z.ravel())), shape=(samples, samples))
        Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples))

        Tcsr = minimum_spanning_tree(Q)
        P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose())
        P = triu(P, k=1)

        V = np.asarray(find(P)).T
        return V[:, :2].astype(np.int32) 
Example #25
Source File: dice.py    From DeepRobust with MIT License 4 votes vote down vote up
def attack(self, ori_adj, labels, n_perturbations, **kwargs):
        """Delete internally, connect externally. This baseline has all true class labels
        (train and test) available.

        Parameters
        ----------
        ori_adj : scipy.sparse.csr_matrix
            Original (unperturbed) adjacency matrix.
        labels:
            node labels
        n_perturbations : int
            Number of edge removals/additions.

        Returns
        -------
        None.

        """

        # ori_adj: sp.csr_matrix

        print('number of pertubations: %s' % n_perturbations)
        modified_adj = ori_adj.tolil()

        remove_or_insert = np.random.choice(2, n_perturbations)
        n_remove = sum(remove_or_insert)

        nonzero = set(zip(*ori_adj.nonzero()))
        indices = sp.triu(modified_adj).nonzero()
        possible_indices = [x for x in zip(indices[0], indices[1])
                            if labels[x[0]] == labels[x[1]]]

        remove_indices = np.random.permutation(possible_indices)[: n_remove]
        modified_adj[remove_indices[:, 0], remove_indices[:, 1]] = 0
        modified_adj[remove_indices[:, 1], remove_indices[:, 0]] = 0

        n_insert = n_perturbations - n_remove

        # # sample edges to add
        # nonzero = nonzero
        # edges = self.random_sample_edges(adj, n_insert, exclude=nonzero)
        # for n1, n2 in edges:
        #     modified_adj[n1, n2] += 1
        #     modified_adj[n2, n1] += 1

        # sample edges to add
        for i in range(n_insert):
            # select a node
            node1 = np.random.randint(ori_adj.shape[0])
            possible_nodes = [x for x in range(ori_adj.shape[0])
                              if labels[x] != labels[node1] and modified_adj[x, node1] == 0]
            # select another node
            node2 = possible_nodes[np.random.randint(len(possible_nodes))]
            modified_adj[node1, node2] = 1
            modified_adj[node2, node1] = 1

        self.check_adj(modified_adj)
        self.modified_adj = modified_adj 
Example #26
Source File: edgeConstruction.py    From DCC with MIT License 4 votes vote down vote up
def mkNN(X, k, measure='euclidean'):
    """
    Construct mutual_kNN for large scale dataset

    If j is one of i's closest neighbors and i is also one of j's closest members,
    the edge will appear once with (i,j) where i < j.

    Parameters
    ----------
    X : [n_samples, n_dim] array
    k : int
      number of neighbors for each sample in X
    """
    from scipy.spatial import distance
    from scipy.sparse import csr_matrix, triu, find
    from scipy.sparse.csgraph import minimum_spanning_tree

    samples = X.shape[0]
    batchsize = 10000
    b = np.arange(k + 1)
    b = tuple(b[1:].ravel())

    z = np.zeros((samples, k))
    weigh = np.zeros_like(z)

    # This loop speeds up the computation by operating in batches
    # This can be parallelized to further utilize CPU/GPU resource
    for x in np.arange(0, samples, batchsize):
        start = x
        end = min(x + batchsize, samples)

        w = distance.cdist(X[start:end], X, measure)

        y = np.argpartition(w, b, axis=1)

        z[start:end, :] = y[:, 1:k + 1]
        weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end - start), k)), tuple(y[:, 1:k + 1].ravel())],
                                         (end - start, k))
        del (w)

    ind = np.repeat(np.arange(samples), k)

    P = csr_matrix((np.ones((samples * k)), (ind.ravel(), z.ravel())), shape=(samples, samples))
    Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples))

    Tcsr = minimum_spanning_tree(Q)
    P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose())
    P = triu(P, k=1)

    return np.asarray(find(P)).T 
Example #27
Source File: split_train_test.py    From EvalNE with MIT License 4 votes vote down vote up
def random_edge_sample_other(a, samp_frac=0.01, directed=False):
    r"""
    Returns a sample of positive and negative edges from the given graph represented by `a` selected uniformly at
    random without replacement. If the directed flag is set to False the samples are obtained only from the upper
    triangle.

    A different take on the random sampling technique. Probably less efficient than the other one. For undir graphs
    generates lots of candidates also from the bottom triangle to reach the desired density, this is not as efficient
    as the other version.

    Parameters
    ----------
    a : sparse matrix
        A sparse adjacency matrix representing a graph.
    samp_frac : float, optional
        An float representing the fraction of elements to sample. Default is 0.01 (1%)
    directed : bool, optional
        A flag indicating if the adjacency matrix should be considered directed or undirected. If undirected
        indices are obtained only from the lower triangle. Default is False.

    Returns
    -------
    pos_e : ndarray
        Positive edges
    neg_e : ndarray
        Negative edges
    """
    n = a.shape[0]
    num_samp = int(n**2 * samp_frac)

    # Generate sparse random matrix representing mask of samples
    density = (num_samp + n) / n**2
    mask = sp.sparse.rand(n, n, density)

    if not directed:
        # For undir graphs we only look at the upper triangle
        mask = triu(mask, k=1)
    else:
        # Remove elements from diagonal
        mask.setdiag(0)
        mask.eliminate_zeros()

    mask.data[:] = 1
    lin_indx_samp = np.ravel_multi_index(mask.nonzero(), (n, n))

    # All positive edges sampled in mask will stay in aux
    aux = mask.multiply(a)
    pos_e = np.array(aux.nonzero()).T

    # The rest of the lin indx not positive are negative
    lin_indx_ne = np.setdiff1d(lin_indx_samp, np.ravel_multi_index(aux.nonzero(), (n, n)))
    neg_e = np.array(np.unravel_index(lin_indx_ne, (n, n)))

    return pos_e, neg_e