Python scipy.sparse.triu() Examples
The following are 27
code examples of scipy.sparse.triu().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.sparse
, or try the search function
.
Example #1
Source File: hamming.py From ruptures with BSD 2-Clause "Simplified" License | 6 votes |
def hamming(bkps1, bkps2): """Modified Hamming distance for partitions. For all pair of points (x, y), x != y, the functions computes the number of times the two partitions disagree. The result is scaled to be within 0 and 1. Args: bkps1 (list): list of the last index of each regime. bkps2 (list): list of the last index of each regime. Returns: float: Hamming distance. """ sanity_check(bkps1, bkps2) n_samples = max(bkps1) disagreement = abs(membership_mat(bkps1) - membership_mat(bkps2)) disagreement = triu(disagreement, k=1).sum() * 1. disagreement /= n_samples * n_samples / 2 # scaling return disagreement
Example #2
Source File: utils.py From node_embedding_attack with MIT License | 6 votes |
def construct_line_graph(adj_matrix): """Construct a line graph from an undirected original graph. Parameters ---------- adj_matrix : sp.spmatrix [n_samples ,n_samples] Symmetric binary adjacency matrix. Returns ------- L : sp.spmatrix, shape [A.nnz/2, A.nnz/2] Symmetric binary adjacency matrix of the line graph. """ N = adj_matrix.shape[0] edges = np.column_stack(sp.triu(adj_matrix, 1).nonzero()) e1, e2 = edges[:, 0], edges[:, 1] I = sp.eye(N).tocsr() E1 = I[e1] E2 = I[e2] L = E1.dot(E1.T) + E1.dot(E2.T) + E2.dot(E1.T) + E2.dot(E2.T) return L - 2 * sp.eye(L.shape[0])
Example #3
Source File: perturbation_attack.py From node_embedding_attack with MIT License | 6 votes |
def baseline_eigencentrality_top_flips(adj_matrix, candidates, n_flips): """Selects the top (n_flips) number of flips using eigencentrality score of the edges. Applicable only when removing edges. :param adj_matrix: sp.spmatrix The graph represented as a sparse scipy matrix :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :param n_flips: int Number of flips to select :return: np.ndarray, shape [?, 2] The top edge flips from the candidate set """ edges = np.column_stack(sp.triu(adj_matrix, 1).nonzero()) line_graph = construct_line_graph(adj_matrix) eigcentrality_scores = nx.eigenvector_centrality_numpy(nx.Graph(line_graph)) eigcentrality_scores = {tuple(edges[k]): eigcentrality_scores[k] for k, v in eigcentrality_scores.items()} eigcentrality_scores = np.array([eigcentrality_scores[tuple(cnd)] for cnd in candidates]) scores_argsrt = eigcentrality_scores.argsort() return candidates[scores_argsrt[-n_flips:]]
Example #4
Source File: basic_test.py From osqp-python with Apache License 2.0 | 6 votes |
def test_upper_triangular_P(self): res_default = self.model.solve() # Get upper triangular P P_triu = sparse.triu(self.P, format='csc') # Setup and solve with upper triangular part only m = osqp.OSQP() m.setup(P=P_triu, q=self.q, A=self.A, l=self.l, u=self.u, **self.opts) res_triu = m.solve() # Assert equal nptest.assert_array_almost_equal(res_default.x, res_triu.x) nptest.assert_array_almost_equal(res_default.y, res_triu.y) nptest.assert_array_almost_equal(res_default.info.obj_val, res_triu.info.obj_val)
Example #5
Source File: coolpup.py From coolpuppy with MIT License | 6 votes |
def get_data(self, region): """Get sparse data for a region Parameters ---------- region : tuple or str Region for which to load the data. Either tuple of (chr, start, end), or string with chromosome name. Returns ------- data : csr Sparse csr matrix for the corresponding region. """ logging.debug("Loading data") data = self.clr.matrix(sparse=True, balance=self.balance).fetch(region) data = sparse.triu(data) return data.tocsr()
Example #6
Source File: test_hicConvertFormat.py From HiCExplorer with GNU General Public License v3.0 | 6 votes |
def test_hicConvertFormat_hicpro_to_cool(): outfile = NamedTemporaryFile(suffix='.cool', delete=False) outfile.close() hicprofile = ROOT + '/test_matrix.hicpro' bedfile = ROOT + '/test_matrix.bed' args = "--matrices {} --outFileName {} --inputFormat hicpro --outputFormat cool --bedFileHicpro {}".format(hicprofile, outfile.name, bedfile).split() hicConvertFormat.main(args) # test = hm.hiCMatrix(original_matrix_cool) # print(outfile.name) new = hm.hiCMatrix(outfile.name) matrixFileHandlerInput = MatrixFileHandler(pFileType='hicpro', pMatrixFile=hicprofile, pBedFileHicPro=bedfile) _matrix, cut_intervals, nan_bins, \ distance_counts, correction_factors = matrixFileHandlerInput.load() new.matrix = triu(new.matrix) nt.assert_array_almost_equal(new.matrix.data, _matrix.data, decimal=0)
Example #7
Source File: quadratic_expression.py From qiskit-aqua with Apache License 2.0 | 5 votes |
def _symmetric_matrix(mat: dok_matrix) -> dok_matrix: upper = triu(mat, 1, format='dok') / 2 # `todok` is necessary because subtraction results in other format return (mat + upper.transpose() - upper).todok()
Example #8
Source File: csi.py From biclustlib with GNU General Public License v3.0 | 5 votes |
def _triu(a, sparse): if sparse: return sp.triu(a, k=1) return np.triu(a, k=1)
Example #9
Source File: __init__.py From python-igraph with GNU General Public License v2.0 | 5 votes |
def get_adjacency_sparse(self, attribute=None): """Returns the adjacency matrix of a graph as scipy csr matrix. @param attribute: if C{None}, returns the ordinary adjacency matrix. When the name of a valid edge attribute is given here, the matrix returned will contain the default value at the places where there is no edge or the value of the given attribute where there is an edge. @return: the adjacency matrix as a L{scipy.sparse.csr_matrix}.""" try: from scipy import sparse except ImportError: raise ImportError('You should install scipy package in order to use this function') import numpy as np edges = self.get_edgelist() if attribute is None: weights = [1] * len(edges) else: if attribute not in self.es.attribute_names(): raise ValueError("Attribute does not exist") weights = self.es[attribute] N = self.vcount() mtx = sparse.csr_matrix((weights, zip(*edges)), shape=(N, N)) if not self.is_directed(): mtx = mtx + sparse.triu(mtx, 1).T + sparse.tril(mtx, -1).T return mtx
Example #10
Source File: conftest.py From cooler with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_random(cls, chrom_offsets, binsize, density): chrom_nbins = np.diff(chrom_offsets) assert chrom_offsets[0] == 0 and np.all(np.diff(chrom_offsets) >= 0) n_chroms = len(chrom_offsets) - 1 n_bins = chrom_offsets[-1] chroms = { "name": np.array( ["chr" + str(i) for i in range(1, n_chroms + 1)], dtype="S" ), "length": np.array( [chrom_nbins[i] * binsize for i in range(n_chroms)], dtype=np.int32 ), } bins = { "chrom": np.concatenate([[i] * chrom_nbins[i] for i in range(n_chroms)]), "start": np.concatenate( [ np.arange(0, chrom_nbins[i] * binsize, binsize) for i in range(n_chroms) ] ), "end": np.concatenate( [ np.arange(binsize, chrom_nbins[i] * (binsize + 1), binsize) for i in range(n_chroms) ] ), } r = sparse.random(n_bins, n_bins, density=density, random_state=1) r = sparse.triu(r, k=1).tocsr() pixels = {"bin1_id": r.tocoo().row, "bin2_id": r.indices, "count": r.data} indexes = { "chrom_offset": np.array(chrom_offsets, dtype=np.int32), "bin1_offset": r.indptr, } return cls(chroms, bins, pixels, indexes, binsize)
Example #11
Source File: utils.py From node_embedding_attack with MIT License | 5 votes |
def generate_candidates_removal(adj_matrix, seed=0): """Generates candidate edge flips for removal (edge -> non-edge), disallowing one random edge per node to prevent singleton nodes. adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :param seed: int Random seed :return: np.ndarray, shape [?, 2] Candidate set of edge flips """ n_nodes = adj_matrix.shape[0] np.random.seed(seed) deg = np.where(adj_matrix.sum(1).A1 == 1)[0] hiddeen = np.column_stack( (np.arange(n_nodes), np.fromiter(map(np.random.choice, adj_matrix.tolil().rows), dtype=np.int))) adj_hidden = edges_to_sparse(hiddeen, adj_matrix.shape[0]) adj_hidden = adj_hidden.maximum(adj_hidden.T) adj_keep = adj_matrix - adj_hidden candidates = np.column_stack((sp.triu(adj_keep).nonzero())) candidates = candidates[np.logical_not(np.in1d(candidates[:, 0], deg) | np.in1d(candidates[:, 1], deg))] return candidates
Example #12
Source File: utils.py From node_embedding_attack with MIT License | 5 votes |
def generate_candidates_removal_minimum_spanning_tree(adj_matrix): """Generates candidate edge flips for removal (edge -> non-edge), disallowing edges that lie on the minimum spanning tree. adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :return: np.ndarray, shape [?, 2] Candidate set of edge flips """ mst = sp.csgraph.minimum_spanning_tree(adj_matrix) mst = mst.maximum(mst.T) adj_matrix_sample = adj_matrix - mst candidates = np.column_stack(sp.triu(adj_matrix_sample, 1).nonzero()) return candidates
Example #13
Source File: sample.py From GPF with MIT License | 5 votes |
def sample_pairs(net, test_ratio=0.1, train_pos=None, test_pos=None, max_train_num=None): # get upper triangular matrix net_triu = ssp.triu(net, k=1) # sample positive links for train/test row, col, _ = ssp.find(net_triu) # sample positive links if not specified if train_pos is None or test_pos is None: perm = random.sample(range(len(row)), len(row)) row, col = row[perm], col[perm] split = int(math.ceil(len(row) * (1 - test_ratio))) train_pos = (row[:split], col[:split]) test_pos = (row[split:], col[split:]) # if max_train_num is set, randomly sample train links if max_train_num is not None: perm = np.random.permutation(len(train_pos[0]))[:max_train_num] train_pos = (train_pos[0][perm], train_pos[1][perm]) # sample negative links for train/test train_num, test_num = len(train_pos[0]), len(test_pos[0]) neg = ([], []) n = net.shape[0] print('sampling negative links for train and test') while len(neg[0]) < train_num + test_num: i, j = random.randint(0, n-1), random.randint(0, n-1) if i < j and net[i, j] == 0: neg[0].append(i) neg[1].append(j) else: continue train_neg = (neg[0][:train_num], neg[1][:train_num]) test_neg = (neg[0][train_num:], neg[1][train_num:]) return train_pos, train_neg, test_pos, test_neg
Example #14
Source File: common.py From sparse with BSD 3-Clause "New" or "Revised" License | 5 votes |
def triu(x, k=0): """ Returns an array with all elements below the k-th diagonal set to zero. Parameters ---------- x : COO The input array. k : int, optional The diagonal below which elements are set to zero. The default is zero, which corresponds to the main diagonal. Returns ------- COO The output upper-triangular matrix. Raises ------ ValueError If :code:`x` doesn't have zero fill-values. See Also -------- numpy.triu : NumPy equivalent function """ from .core import COO check_zero_fill_value(x) if not x.ndim >= 2: raise NotImplementedError( "sparse.triu is not implemented for scalars or 1-D arrays." ) mask = x.coords[-2] + k <= x.coords[-1] coords = x.coords[:, mask] data = x.data[mask] return COO(coords, data, shape=x.shape, has_duplicates=False, sorted=True)
Example #15
Source File: test_coo.py From sparse with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_triul(shape, k): s = sparse.random(shape, density=0.5) x = s.todense() assert_eq(np.triu(x, k), sparse.triu(s, k)) assert_eq(np.tril(x, k), sparse.tril(s, k))
Example #16
Source File: sim.py From clusim with MIT License | 5 votes |
def omega_index(clustering1, clustering2): ''' This function calculates the omega index between two clusterings. See :cite:`Collins1988omega` for a detailed derivation and explanation of the measure. :param Clustering clustering1: The first clustering. :param Clustering clustering2: The second clustering. :returns: the omega index ''' if clustering1.n_elements != clustering2.n_elements: raise ClusteringSimilarityError elif any(e1 != e2 for e1, e2 in zip(clustering1.elements, clustering2.elements)): raise ClusteringSimilarityError A1 = make_overlapping_membership_matrix(clustering1) A2 = make_overlapping_membership_matrix(clustering2) M = clustering1.n_elements * (clustering1.n_elements - 1) / 2.0 maxNover = max(max(A1.diagonal()), max(A2.diagonal())) + 1 Anot = spsparse.triu((A1 != A2), k=1).sum() omega_u = 1.0 - Anot.sum() / M t_0_1 = M - spsparse.triu((A1 != 0), k=1).sum() t_0_2 = M - spsparse.triu((A2 != 0), k=1).sum() t_k_1 = [spsparse.triu((A1 == i), k=1).sum() for i in range(1, maxNover)] t_k_2 = [spsparse.triu((A2 == i), k=1).sum() for i in range(1, maxNover)] omega_e = (t_0_1*t_0_2 + np.dot(t_k_1, t_k_2)) / M**2 return (omega_u - omega_e) / (1.0 - omega_e)
Example #17
Source File: mesh_elements.py From BrainSpace with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_edges(surf, mask=None): """Get surface edges. Parameters ---------- surf : vtkDataSet or BSDataSet Input surface. mask : 1D ndarray, optional Binary mask. If specified, only use points within the mask. Default is None. Returns ------- edges : ndarray, shape (n_edges, 2) Array of edges. Each element is a point id. See Also -------- :func:`get_edge_length` :func:`get_points` :func:`get_cells` """ adj = get_immediate_adjacency(surf, include_self=False, mask=mask, dtype=np.bool) adj.sort_indices() adj_ud = ssp.triu(adj, k=1, format='coo') edges = np.column_stack([adj_ud.row, adj_ud.col]) return edges
Example #18
Source File: polishing_test.py From osqp-python with Apache License 2.0 | 5 votes |
def test_polish_random(self): # Random QP problem sp.random.seed(6) self.n = 30 self.m = 50 Pt = sp.randn(self.n, self.n) self.P = sparse.triu(np.dot(Pt.T, Pt), format='csc') self.q = sp.randn(self.n) self.A = sparse.csc_matrix(sp.randn(self.m, self.n)) self.l = -3 + sp.randn(self.m) self.u = 3 + sp.randn(self.m) self.model = osqp.OSQP() self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u, **self.opts) # Solve problem res = self.model.solve() # Assert close nptest.assert_array_almost_equal( res.x, np.array([ -0.58549607, 0.0030388, -0.07154039, -0.0406463, -0.13349925, -0.1354755, -0.17417362, 0.0165324, -0.12213118, -0.10477034, -0.51748662, -0.05310921, 0.07862616, 0.53663003, -0.01459859, 0.40678716, -0.03496123, 0.25722838, 0.06335071, 0.29908295, -0.6223218, -0.07614658, -0.3892153, -0.18111635, 0.56301768, 0.10429917, 0.09821862, -0.30881928, 0.24430531, 0.06597486])) nptest.assert_array_almost_equal( res.y, np.array([ 0., -2.11407101e-01, 0., 0., 0., 0., 0., 0., 0., 0., -3.78854588e-02, 0., -1.58346998e-02, 0., 0., -6.88711599e-02, 0., 0., 0., 0., 0., 0., 0., 0., 6.04385132e-01, 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.37995470e-01, 0., 0., 0., -2.04427802e-02, 0., -1.32983915e-01, 0., 2.94425952e-02, 0., 0., 0., 0., 0., -6.53409219e-02, 0.])) nptest.assert_array_almost_equal(res.info.obj_val, -3.262280663471232)
Example #19
Source File: non_convex_test.py From osqp-python with Apache License 2.0 | 5 votes |
def setUp(self): # Simple QP problem self.P = sparse.triu([[2., 5.], [5., 1.]], format='csc') self.q = np.array([3, 4]) self.A = sparse.csc_matrix([[-1.0, 0.], [0., -1.], [-1., 3.], [2., 5.], [3., 4]]) self.u = np.array([0., 0., -15, 100, 80]) self.l = -np.inf * np.ones(len(self.u)) self.model = osqp.OSQP()
Example #20
Source File: update_matrices_test.py From osqp-python with Apache License 2.0 | 5 votes |
def setUp(self): # Simple QP problem sp.random.seed(1) self.n = 5 self.m = 8 p = 0.7 Pt = sparse.random(self.n, self.n, density=p) Pt_new = Pt.copy() Pt_new.data += 0.1 * np.random.randn(Pt.nnz) self.P = sparse.triu(Pt.T.dot(Pt) + sparse.eye(self.n), format='csc') self.P_new = sparse.triu(Pt_new.T.dot(Pt_new) + sparse.eye(self.n), format='csc') self.q = np.random.randn(self.n) self.A = sparse.random(self.m, self.n, density=p, format='csc') self.A_new = self.A.copy() self.A_new.data += np.random.randn(self.A_new.nnz) self.l = np.zeros(self.m) self.u = 30 + np.random.randn(self.m) self.opts = {'eps_abs': 1e-08, 'eps_rel': 1e-08, 'verbose': False} self.model = osqp.OSQP() self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u, **self.opts)
Example #21
Source File: warm_start_test.py From osqp-python with Apache License 2.0 | 5 votes |
def test_warm_start(self): # Big problem sp.random.seed(2) self.n = 100 self.m = 200 self.A = sparse.random(self.m, self.n, density=0.9, format='csc') self.l = -sp.rand(self.m) * 2. self.u = sp.rand(self.m) * 2. P = sparse.random(self.n, self.n, density=0.9) self.P = sparse.triu(P.dot(P.T), format='csc') self.q = sp.randn(self.n) # Setup solver self.model = osqp.OSQP() self.model.setup(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u, **self.opts) # Solve problem with OSQP res = self.model.solve() # Store optimal values x_opt = res.x y_opt = res.y tot_iter = res.info.iter # Warm start with zeros and check if number of iterations is the same self.model.warm_start(x=np.zeros(self.n), y=np.zeros(self.m)) res = self.model.solve() self.assertEqual(res.info.iter, tot_iter) # Warm start with optimal values and check that number of iter < 10 self.model.warm_start(x=x_opt, y=y_opt) res = self.model.solve() self.assertLess(res.info.iter, 10)
Example #22
Source File: split_train_test.py From EvalNE with MIT License | 4 votes |
def random_edge_sample(a, samp_frac=0.01, directed=False): r""" Returns a sample of positive and negative edges from the given graph represented by `a` selected uniformly at random without replacement. If the directed flag is set to False the samples are obtained only from the upper triangle. Parameters ---------- a : sparse matrix A sparse adjacency matrix representing a graph. samp_frac : float, optional An float representing the fraction of elements to sample. Default is 1.0 (1%) directed : bool, optional A flag indicating if the adjacency matrix should be considered directed or undirected. If undirected indices are obtained only from the lower triangle. Default is False. Returns ------- pos_e : ndarray Positive edges neg_e : ndarray Negative edges """ n = a.shape[0] if directed: num_samp = int(n ** 2 * samp_frac / 100) lin_indx_a = np.ravel_multi_index(a.nonzero(), (n, n)) # randomly generate linear indices lin_indx = np.random.randint(0, n ** 2, num_samp) else: # For undir graphs we only need to sample half the num nodes num_samp = int((n*(n-1))/2 * (samp_frac / 100)) lin_indx_a = np.ravel_multi_index(triu(a, k=1).nonzero(), (n, n)) ij = np.random.randint(0, n, size=(2, num_samp)) ij.sort(axis=0) lin_indx = np.ravel_multi_index((ij[0], ij[1]), (n, n)) pos_e = np.intersect1d(lin_indx, lin_indx_a) neg_e = np.setdiff1d(lin_indx, lin_indx_a) # Remove the self edges lin_diag_indxs = np.ravel_multi_index(np.diag_indices(n), (n, n)) pos_e = np.setdiff1d(pos_e, lin_diag_indxs) neg_e = np.setdiff1d(neg_e, lin_diag_indxs) # Unravel the linear indices to obtain src, dst pairs pos_e = np.array(np.unravel_index(np.array(pos_e), (n, n))).T neg_e = np.array(np.unravel_index(np.array(neg_e), (n, n))).T return pos_e, neg_e
Example #23
Source File: utils.py From graph2gauss with MIT License | 4 votes |
def batch_pairs_sample(A, nodes_hide): """ For a given set of nodes return all edges and an equal number of randomly sampled non-edges. Parameters ---------- A : sp.spmatrix Sparse adjacency matrix Returns ------- pairs : array-like, shape [?, 2] The sampled pairs. """ A = A.copy() undiricted = (A != A.T).nnz == 0 if undiricted: A = sp.triu(A, 1).tocsr() edges = np.column_stack(A.nonzero()) edges = edges[np.in1d(edges[:, 0], nodes_hide) | np.in1d(edges[:, 1], nodes_hide)] # include the missing direction if undiricted: edges = np.row_stack((edges, np.column_stack((edges[:, 1], edges[:, 0])))) # sample the non-edges for each node separately arng = np.arange(A.shape[0]) not_edges = [] for nh in nodes_hide: nn = np.concatenate((A[nh].nonzero()[1], A[:, nh].nonzero()[0])) not_nn = np.setdiff1d(arng, nn) not_nn = np.random.permutation(not_nn)[:len(nn)] not_edges.append(np.column_stack((np.repeat(nh, len(nn)), not_nn))) not_edges = np.row_stack(not_edges) # include the missing direction if undiricted: not_edges = np.row_stack((not_edges, np.column_stack((not_edges[:, 1], not_edges[:, 0])))) pairs = np.row_stack((edges, not_edges)) return pairs
Example #24
Source File: rcc.py From pyrcc with MIT License | 4 votes |
def m_knn(X, k, measure='euclidean'): """ This code is taken from: https://bitbucket.org/sohilas/robust-continuous-clustering/src/ The original terms of the license apply. Construct mutual_kNN for large scale dataset If j is one of i's closest neighbors and i is also one of j's closest members, the edge will appear once with (i,j) where i < j. Parameters ---------- X (array) 2d array of data of shape (n_samples, n_dim) k (int) number of neighbors for each sample in X measure (string) distance metric, one of 'cosine' or 'euclidean' """ samples = X.shape[0] batch_size = 10000 b = np.arange(k+1) b = tuple(b[1:].ravel()) z = np.zeros((samples, k)) weigh = np.zeros_like(z) # This loop speeds up the computation by operating in batches # This can be parallelized to further utilize CPU/GPU resource for x in np.arange(0, samples, batch_size): start = x end = min(x+batch_size, samples) w = distance.cdist(X[start:end], X, measure) y = np.argpartition(w, b, axis=1) z[start:end, :] = y[:, 1:k + 1] weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end-start), k)), tuple(y[:, 1:k+1].ravel())], (end-start, k)) del w ind = np.repeat(np.arange(samples), k) P = csr_matrix((np.ones((samples*k)), (ind.ravel(), z.ravel())), shape=(samples, samples)) Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples)) Tcsr = minimum_spanning_tree(Q) P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose()) P = triu(P, k=1) V = np.asarray(find(P)).T return V[:, :2].astype(np.int32)
Example #25
Source File: dice.py From DeepRobust with MIT License | 4 votes |
def attack(self, ori_adj, labels, n_perturbations, **kwargs): """Delete internally, connect externally. This baseline has all true class labels (train and test) available. Parameters ---------- ori_adj : scipy.sparse.csr_matrix Original (unperturbed) adjacency matrix. labels: node labels n_perturbations : int Number of edge removals/additions. Returns ------- None. """ # ori_adj: sp.csr_matrix print('number of pertubations: %s' % n_perturbations) modified_adj = ori_adj.tolil() remove_or_insert = np.random.choice(2, n_perturbations) n_remove = sum(remove_or_insert) nonzero = set(zip(*ori_adj.nonzero())) indices = sp.triu(modified_adj).nonzero() possible_indices = [x for x in zip(indices[0], indices[1]) if labels[x[0]] == labels[x[1]]] remove_indices = np.random.permutation(possible_indices)[: n_remove] modified_adj[remove_indices[:, 0], remove_indices[:, 1]] = 0 modified_adj[remove_indices[:, 1], remove_indices[:, 0]] = 0 n_insert = n_perturbations - n_remove # # sample edges to add # nonzero = nonzero # edges = self.random_sample_edges(adj, n_insert, exclude=nonzero) # for n1, n2 in edges: # modified_adj[n1, n2] += 1 # modified_adj[n2, n1] += 1 # sample edges to add for i in range(n_insert): # select a node node1 = np.random.randint(ori_adj.shape[0]) possible_nodes = [x for x in range(ori_adj.shape[0]) if labels[x] != labels[node1] and modified_adj[x, node1] == 0] # select another node node2 = possible_nodes[np.random.randint(len(possible_nodes))] modified_adj[node1, node2] = 1 modified_adj[node2, node1] = 1 self.check_adj(modified_adj) self.modified_adj = modified_adj
Example #26
Source File: edgeConstruction.py From DCC with MIT License | 4 votes |
def mkNN(X, k, measure='euclidean'): """ Construct mutual_kNN for large scale dataset If j is one of i's closest neighbors and i is also one of j's closest members, the edge will appear once with (i,j) where i < j. Parameters ---------- X : [n_samples, n_dim] array k : int number of neighbors for each sample in X """ from scipy.spatial import distance from scipy.sparse import csr_matrix, triu, find from scipy.sparse.csgraph import minimum_spanning_tree samples = X.shape[0] batchsize = 10000 b = np.arange(k + 1) b = tuple(b[1:].ravel()) z = np.zeros((samples, k)) weigh = np.zeros_like(z) # This loop speeds up the computation by operating in batches # This can be parallelized to further utilize CPU/GPU resource for x in np.arange(0, samples, batchsize): start = x end = min(x + batchsize, samples) w = distance.cdist(X[start:end], X, measure) y = np.argpartition(w, b, axis=1) z[start:end, :] = y[:, 1:k + 1] weigh[start:end, :] = np.reshape(w[tuple(np.repeat(np.arange(end - start), k)), tuple(y[:, 1:k + 1].ravel())], (end - start, k)) del (w) ind = np.repeat(np.arange(samples), k) P = csr_matrix((np.ones((samples * k)), (ind.ravel(), z.ravel())), shape=(samples, samples)) Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples)) Tcsr = minimum_spanning_tree(Q) P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose()) P = triu(P, k=1) return np.asarray(find(P)).T
Example #27
Source File: split_train_test.py From EvalNE with MIT License | 4 votes |
def random_edge_sample_other(a, samp_frac=0.01, directed=False): r""" Returns a sample of positive and negative edges from the given graph represented by `a` selected uniformly at random without replacement. If the directed flag is set to False the samples are obtained only from the upper triangle. A different take on the random sampling technique. Probably less efficient than the other one. For undir graphs generates lots of candidates also from the bottom triangle to reach the desired density, this is not as efficient as the other version. Parameters ---------- a : sparse matrix A sparse adjacency matrix representing a graph. samp_frac : float, optional An float representing the fraction of elements to sample. Default is 0.01 (1%) directed : bool, optional A flag indicating if the adjacency matrix should be considered directed or undirected. If undirected indices are obtained only from the lower triangle. Default is False. Returns ------- pos_e : ndarray Positive edges neg_e : ndarray Negative edges """ n = a.shape[0] num_samp = int(n**2 * samp_frac) # Generate sparse random matrix representing mask of samples density = (num_samp + n) / n**2 mask = sp.sparse.rand(n, n, density) if not directed: # For undir graphs we only look at the upper triangle mask = triu(mask, k=1) else: # Remove elements from diagonal mask.setdiag(0) mask.eliminate_zeros() mask.data[:] = 1 lin_indx_samp = np.ravel_multi_index(mask.nonzero(), (n, n)) # All positive edges sampled in mask will stay in aux aux = mask.multiply(a) pos_e = np.array(aux.nonzero()).T # The rest of the lin indx not positive are negative lin_indx_ne = np.setdiff1d(lin_indx_samp, np.ravel_multi_index(aux.nonzero(), (n, n))) neg_e = np.array(np.unravel_index(lin_indx_ne, (n, n))) return pos_e, neg_e