Python Examples of scipy.sparse.find

Source File: data_helper.py From HFT-CNN with MIT License

6 votes

def build_input_label_data(labels, class_order):
    from sklearn.preprocessing import MultiLabelBinarizer
    from itertools import chain

    bml = MultiLabelBinarizer(classes=class_order, sparse_output=True)
    indexes = sp.find(bml.fit_transform(labels)) 
    y = []

    for i in range(len(labels)):
        y.append([])
    for i,j in zip(indexes[0], indexes[1]):
        y[i].append(j)
    return y

# padding operation
# =========================================================

Source File: reader.py From pyrwr with MIT License

6 votes

def read_undirected_graph(X, weighted):
    rows = X[:, 0]
    cols = X[:, 1]
    data = X[:, 2]

    # assume id starts from 0
    n = int(np.amax(X[:, 0:2])) + 1

    # the weights of redundant edges will be summed (by default in csr_matrix)
    _A = csr_matrix((data, (rows, cols)), shape=(n, n))

    # this is under the assumption that src_id <= dst_id for all edges (see line 80 in this code)
    A = _A + _A.T

    if not weighted:
        # no redundant edges are allowed for unweighted graphs
        I, J, K = find(A)
        A = csr_matrix((np.ones(len(K)), (I, J)), shape=A.shape)

    return A

Source File: modularity.py From markov_clustering with MIT License

6 votes

def convert_to_adjacency_matrix(matrix):
    """
    Converts transition matrix into adjacency matrix

    :param matrix: The matrix to be converted
    :returns: adjacency matrix
    """
    for i in range(matrix.shape[0]):
        
        if isspmatrix(matrix):
            col = find(matrix[:,i])[2]
        else:
            col = matrix[:,i].T.tolist()[0]

        coeff = max( Fraction(c).limit_denominator().denominator for c in col )
        matrix[:,i] *= coeff

    return matrix

Source File: context.py From 4lang with MIT License

5 votes

def lookup_0_freqs(self, word):
        i = self.vocabulary.get(word)
        if i is None:
            return None
        out_sum = sum(find(self.zero_sparse[i, :])[2])
        in_sum = sum(find(self.zero_sparse[:, i])[2])
        return out_sum, in_sum

Source File: sample.py From GPF with MIT License

5 votes

def sample_pairs(net, test_ratio=0.1, train_pos=None, test_pos=None, max_train_num=None):
    # get upper triangular matrix
    net_triu = ssp.triu(net, k=1)
    # sample positive links for train/test
    row, col, _ = ssp.find(net_triu)
    # sample positive links if not specified
    if train_pos is None or test_pos is None:
        perm = random.sample(range(len(row)), len(row))
        row, col = row[perm], col[perm]
        split = int(math.ceil(len(row) * (1 - test_ratio)))
        train_pos = (row[:split], col[:split])
        test_pos = (row[split:], col[split:])
    # if max_train_num is set, randomly sample train links
    if max_train_num is not None:
        perm = np.random.permutation(len(train_pos[0]))[:max_train_num]
        train_pos = (train_pos[0][perm], train_pos[1][perm])
    # sample negative links for train/test
    train_num, test_num = len(train_pos[0]), len(test_pos[0])
    neg = ([], [])
    n = net.shape[0]
    print('sampling negative links for train and test')
    while len(neg[0]) < train_num + test_num:
        i, j = random.randint(0, n-1), random.randint(0, n-1)
        if i < j and net[i, j] == 0:
            neg[0].append(i)
            neg[1].append(j)
        else:
            continue
    train_neg  = (neg[0][:train_num], neg[1][:train_num])
    test_neg = (neg[0][train_num:], neg[1][train_num:])
    
    return train_pos, train_neg, test_pos, test_neg

Source File: subgraphs.py From GPF with MIT License

5 votes

def neighbors(fringe, A):
    # find all 1-hop neighbors of nodes in fringe from A
    res = set()
    for node in fringe:
        nei, _, _ = ssp.find(A[:, node])
        nei = set(nei)
        res = res.union(nei)
        #print res
    return res

Source File: _network.py From PyINT with GNU General Public License v3.0

5 votes

def select_pairs_sequential(date_list, num_connection=2, date12_format='YYMMDD-YYMMDD'):
    """Select Pairs in a Sequential way:
        For each acquisition, find its num_connection nearest acquisitions in the past time.
    Inputs:
        date_list  : list of date in YYMMDD/YYYYMMDD format
    Reference:
        Fattahi, H., and F. Amelung (2013), DEM Error Correction in InSAR Time Series, IEEE TGRS, 51(7), 4249-4259.
    """
    date8_list = sorted(yyyymmdd(date_list))
    date6_list = yymmdd(date8_list)
    date_idx_list = list(range(len(date6_list)))

    # Get pairs index list
    date12_idx_list = []
    for date_idx in date_idx_list:
        for i in range(num_connection):
            if date_idx-i-1 >= 0:
                date12_idx_list.append([date_idx-i-1, date_idx])
    date12_idx_list = [sorted(idx) for idx in sorted(date12_idx_list)]

    # Convert index into date12
    date12_list = [date6_list[idx[0]]+'-'+date6_list[idx[1]]
                   for idx in date12_idx_list]
    if date12_format == 'YYYYMMDD_YYYYMMDD':
        date12_list = yyyymmdd_date12(date12_list)
    return date12_list

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

5 votes

def jacobian(self, params, into=None):
        params = flattest(params)
        n = len(params)
        ii = np.arange(n)
        (rs,cs,zs) = ([],[],[])
        for ((mn,mx), f) in self.pieces_with_default:
            if len(ii) == 0: break
            k = np.where((params >= mn) & (params <= mx))[0]
            if len(k) == 0: continue
            kk = ii[k]
            j = f.jacobian(params[k])
            if j.shape[0] == 1 and j.shape[1] > 1: j = repmat(j, j.shape[1], 1)
            (rj,cj,vj) = sps.find(j)
            rs.append(kk[rj])
            cs.append(kk[cj])
            zs.append(vj)
            ii = np.delete(ii, k)
            params = np.delete(params, k)
        (rs,cs,zs) = [np.concatenate(us) if len(us) > 0 else [] for us in (rs,cs,zs)]
        dz = sps.csr_matrix((zs, (rs,cs)), shape=(n,n))
        return safe_into(into, dz)

Source File: demo.py From NPHard with MIT License

5 votes

def add_rnd_q(cns, nIS_vec_local):
    global adj_0

    nIS_vec_local[cns] = 1
    tmp = sp.find(adj_0[cns, :] == 1)
    nIS_vec_local[tmp[1]] = 0
    remain_vec_tmp = (nIS_vec_local == -1)
    adj = adj_0
    adj = adj[remain_vec_tmp, :]
    adj = adj[:, remain_vec_tmp]
    if reduce_graph(adj, nIS_vec_local):
        return True
    return False

Source File: demo_parallel.py From NPHard with MIT License

5 votes

def add_rnd_q(cns, nIS_vec_local, pnum, lock):
    global adj_0

    nIS_vec_local[cns] = 1
    tmp = sp.find(adj_0[cns, :] == 1)
    nIS_vec_local[tmp[1]] = 0
    remain_vec_tmp = (nIS_vec_local == -1)
    adj = adj_0
    adj = adj[remain_vec_tmp, :]
    adj = adj[:, remain_vec_tmp]
    if reduce_graph(adj, nIS_vec_local, pnum, lock):
        return True
    return False

Source File: reader.py From pyrwr with MIT License

5 votes

def read_directed_graph(X, weighted):
    rows = X[:, 0]
    cols = X[:, 1]
    data = X[:, 2]

    # assume id starts from 0
    n = int(np.amax(X[:, 0:2])) + 1

    # the weights of redundant edges will be summed (by default in csr_matrix)
    A = csr_matrix((data, (rows, cols)), shape=(n, n))

    if not weighted:
        # no redundant edges are allowed for unweighted graphs
        I, J, K = find(A)
        A = csr_matrix((np.ones(len(K), (I, J), shape=A.shape)))

    return A

Source File: base.py From dislib with Apache License 2.0

5 votes

def _update_chunk(blocks, x, params):
    n_f, lambda_, axis = params
    r_chunk = Array._merge_blocks(blocks)
    if axis == 1:
        r_chunk = r_chunk.transpose()

    n = r_chunk.shape[0]
    y = np.zeros((n, n_f), dtype=np.float32)
    n_c = np.array(
        [len(sparse.find(r_chunk[i])[0]) for i in
         range(0, r_chunk.shape[0])])
    for element in range(0, n):
        indices = sparse.find(r_chunk[element])[1]

        x_xt = x[indices].T.dot(x[indices])

        a_i = x_xt + lambda_ * n_c[element] * np.eye(n_f)
        v_i = x[indices].T.dot(r_chunk[element, indices].toarray().T)

        # TODO: decide if atol should be changed when default is changed
        y[element] = sparse.linalg.cg(a_i, v_i, atol='legacy')[0].reshape(-1)

    return y

Source File: context.py From 4lang with MIT License

5 votes

def lookup_arg_freqs(self, word):
        i = self.vocabulary.get(word)
        if i is None:
            return None
        sum1 = sum(find(self.binary_sparse[::2, i])[2])
        sum2 = sum(find(self.binary_sparse[1::2, i])[2])
        return sum1, sum2

Source File: context.py From 4lang with MIT License

5 votes

def lookup_bin_freqs(self, word):
        i = self.binary_vocab.get(word)
        if i is None:
            return None
        sum1 = sum(find(self.binary_sparse[2*i, :])[2])
        sum2 = sum(find(self.binary_sparse[2*i+1, :])[2])
        return sum1, sum2

Source File: SpreadingActivation.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, Y):
        n_samples = X.shape[0]
        F = self.firing_threshold
        decay = self.decay
        coef_ = np.zeros(shape=(X.shape[1]), dtype=np.float64)
        fired_ = np.zeros(shape=(X.shape[1]), dtype=np.bool_)
        _, I, V = sp.find(Y)
        coef_[I] += np.divide(V[I], X.shape[0])

        markers = deque(I)
        while markers:
            i = markers.popleft()
            if coef_[i] >= F and not fired[i]:
                #fire
                for j in self.hierarchy.neighbors(i):
                    if self.use_weights:
                        coef_[j] += coef[i] * decay * hierarchy[i][j]['weight']
                    else:
                        coef_[j] += coef[i] * decay 
                    if coef_[j] >= F:
                        coef_[j] = F
                        markers.append(n)

        self.coef_ = coef_
        return self

Source File: SpreadingActivation.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, X):
        F = self.firing_threshold
        hierarchy = self.hierarchy
        decay = self.decay
        if self.verbose: print("[SA] %.4f concepts per sample."%(float(X.getnnz()) / X.shape[0]))
        if self.verbose: print("[SA] Starting Spreading Activation")
        X_out = sp.lil_matrix(X.shape,dtype=X.dtype)
        fired = sp.lil_matrix(X.shape,dtype=np.bool_)
        I, J, V = sp.find(X)
        X_out[I,J] = V
        markers = deque(zip(I,J))
        while markers:
            i, j = markers.popleft()
            if X_out[i,j] >= F and not fired[i,j]:
                #markers.extend(self._fire(X_out, i, j))
                fired[i,j] = True 
                for target in hierarchy.predecessors(j):
                    if self.weighting:
                        X_out[i,target] += X_out[i,j] * decay * hierarchy[target][j]['weight']     
                    else:
                        X_out[i,target] += X_out[i,j] * decay 

                    if X_out[i, target] >= F:
                        if self.strict: A[i,target] = F
                        markers.append((i,target))

        if self.verbose: print("[SA] %.4f fired per sample."%(float(fired.getnnz()) / X.shape[0]))
        return sp.csr_matrix(X_out)

Source File: SpreadingActivation.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, X):
        hierarchy = self.hierarchy
        decay = self.decay
        threshold = self.child_threshold
        verbose = self.verbose

        n_hops = 0
        if verbose: print("[OneHopActivation]")
        X_out = sp.lil_matrix(X.shape, dtype=X.dtype)
        I, J, _ = sp.find(X)
        for i, j in zip(I,J):
            n_children = 0
            sum_children = 0
            for child in hierarchy.successors(j):
                if X[i, child] > 0: # same row i
                    n_children += 1
                    sum_children += X[i, child]
            if n_children >= threshold:
                if verbose: print("Hop", end=" ")
                n_hops += 1
                X_out[i,j] = X[i,j] + sum_children * decay
            else:
                X_out[i,j] = X[i,j]

        if verbose: print("\n[OneHopActivation] %d hops." % n_hops)

        return sp.csr_matrix(X_out)

Source File: SpreadingActivation.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, X, y=None):
        ''' From each value in the feature matrix,
        traverse upwards in the hierarchy (including multiple parents in DAGs),
        and set all nodes to one'''
        hierarchy = self.hierarchy
        X_out = np.zeros(X.shape, dtype=np.bool_)
        samples, relevant_topics, _ = sp.find(X)
        for sample, topic in zip(samples, relevant_topics):
            X_out[sample, topic] = 1
            ancestors = nx.ancestors(hierarchy, topic)
            for ancestor in ancestors:
                X_out[sample, ancestor] = 1

        return X_out

Source File: MeanCutShell.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, Y):
        self.n_topics = Y.shape[1]
        ones = len(sp.find(Y)[2])
        self.mu = ones / X.shape[0]
        for clf in self.clfs:
            clf.fit(X, Y)
        return self

Source File: graph_diffusion.py From seqc with GNU General Public License v2.0

5 votes

def keigs(T, k, P, take_diagonal=0):
        """ return k largest magnitude eigenvalues for the matrix T.
        :param T: Matrix to find eigen values/vectors of
        :param k: number of eigen values/vectors to return
        :param P: in the case of symmetric normalizations,
                  this is the NxN diagonal matrix which relates the nonsymmetric
                  version to the symmetric form via conjugation
        :param take_diagonal: if 1, returns the eigenvalues as a vector rather than as a
                              diagonal matrix.
        """
        D, V = eigs(T, k, tol=1e-4, maxiter=1000)
        D = np.real(D)
        V = np.real(V)
        inds = np.argsort(D)[::-1]
        D = D[inds]
        V = V[:, inds]
        if P is not None:
            V = P.dot(V)

        # Normalize
        for i in range(V.shape[1]):
            V[:, i] = V[:, i] / norm(V[:, i])
        V = np.round(V, 10)

        if take_diagonal == 0:
            D = np.diag(D)

        return V, D

Source File: utils.py From relational-gcn with MIT License

5 votes

def get_neighbors(adj, nodes):
    """Takes a set of nodes and a graph adjacency matrix and returns a set of neighbors."""
    sp_nodes = sp_row_vec_from_idx_list(list(nodes), adj.shape[1])
    sp_neighbors = sp_nodes.dot(adj)
    neighbors = set(sp.find(sp_neighbors)[1])  # convert to set of indices
    return neighbors

Source File: msm.py From enspara with GNU General Public License v3.0

5 votes

def __eq__(self, other):
        if self is other:
            return True
        else:
            if self.config != other.config:
                return False

            if self.result_ is None:
                # one is not fit, equality if neither is
                return other.result_ is None
            else:
                # eq probs can do numpy comparison (dense)
                if not np.all(self.eq_probs_ == other.eq_probs_):
                    return False

                if self.mapping_ != other.mapping_:
                    return False

                # compare tcounts, tprobs shapes.
                if self.tcounts_.shape != other.tcounts_.shape or \
                   self.tprobs_.shape != other.tprobs_.shape:
                    return False

                # identical shapes => use nnz for element-wise equality
                if (self.tcounts_ != other.tcounts_).nnz != 0:
                    return False

                # imperfect serialization leads to diff in tprobs, use
                # allclose instead of all
                f_self = sparse.find(self.tprobs_)
                f_other = sparse.find(other.tprobs_)

                if not np.all(f_self[0] == f_other[0]) or \
                   not np.all(f_self[1] == f_other[1]):
                    return False

                if not np.all(f_self[2] == f_other[2]):
                    print("tprobs differs.")
                    return False

                return True

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

5 votes

def tmpdir(prefix='npythy_tempdir_', delete=True):
    '''
    tmpdir() creates a temporary directory and yields its path. At python exit, the directory and
      all of its contents are recursively deleted (so long as the the normal python exit process is
      allowed to call the atexit handlers).
    tmpdir(prefix) uses the given prefix in the tempfile.mkdtemp() call.
    
    The option delete may be set to False to specify that the tempdir should not be deleted on exit.
    '''
    path = tempfile.mkdtemp(prefix=prefix)
    if not os.path.isdir(path): raise ValueError('Could not find or create temp directory')
    if delete: atexit.register(shutil.rmtree, path)
    return path

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

5 votes

def try_until(*args, **kw):
    '''
    try_until(f1, f2, f3...) attempts to return f1(); if this raises an Exception during its
      evaluation, however, it attempts to return f2(); etc. If none of the functions succeed, then
      an exception is raised.

    The following optional arguments may be given:
      * check (default: None) may specify a function of one argument that must return True when the
        passed value is an acceptable return value; for example, an option of
        `check=lambda x: x is not None`  would indicate that a function that returns None should not
        be considered to have succeeded.
    '''
    if 'check' in kw: check = kw.pop('check')
    else: check = None
    if len(kw) > 0: raise ValueError('unrecognized options given to try_until')
    for f in args:
        if not hasattr(f, '__call__'):
            raise ValueError('function given to try_until is not callable')
        try:
            rval = f()
            if check is None or check(rval): return rval
        except Exception: raise
    raise ValueError('try_until failed to find a successful function return')

Source File: util_functions.py From IGMC with MIT License

5 votes

def neighbors(fringe, A, row=True):
    # find all 1-hop neighbors of nodes in fringe from A
    res = set()
    for node in fringe:
        if row:
            _, nei, _ = ssp.find(A[node, :])
        else:
            nei, _, _ = ssp.find(A[:, node])
        nei = set(nei)
        res = res.union(nei)
    return res

Source File: knowledge_graph.py From dgl with Apache License 2.0

5 votes

def _get_neighbors(adj, nodes):
    """Takes a set of nodes and a graph adjacency matrix and returns a set of neighbors."""
    sp_nodes = _sp_row_vec_from_idx_list(list(nodes), adj.shape[1])
    sp_neighbors = sp_nodes.dot(adj)
    neighbors = set(sp.find(sp_neighbors)[1])  # convert to set of indices
    return neighbors

Source File: designmatrix.py From lightkurve with MIT License

5 votes

def create_spline_matrix(x, n_knots=20, knots=None, degree=3, name='spline',
                         include_intercept=True):
    """Returns a `.DesignMatrix` which models splines using `patsy.dmatrix`.
    Parameters
    ----------
    x : np.ndarray
        vector to spline
    n_knots: int
        Number of knots (default: 20).
    degree: int
        Polynomial degree.
    name: string
        Name to pass to `.DesignMatrix` (default: 'spline').
    include_intercept: bool
        Whether to include row of ones to find intercept. Default False.
    Returns
    -------
    dm: `.DesignMatrix`
        Design matrix object with shape (len(x), n_knots*degree).
    """
    from patsy import dmatrix  # local import because it's rarely-used
    if knots is not None:
        dm_formula = "bs(x, knots={}, degree={}, include_intercept={}) - 1" \
                     "".format(knots, degree, include_intercept)
        spline_dm = np.asarray(dmatrix(dm_formula, {"x": x}))
        df = pd.DataFrame(spline_dm, columns=['knot{}'.format(idx + 1)
                                              for idx in range(spline_dm.shape[1])])
    else:
        dm_formula = "bs(x, df={}, degree={}, include_intercept={}) - 1" \
                 "".format(n_knots, degree, include_intercept)
        spline_dm = np.asarray(dmatrix(dm_formula, {"x": x}))
        df = pd.DataFrame(spline_dm, columns=['knot{}'.format(idx + 1)
                                              for idx in range(n_knots)])
    return DesignMatrix(df, name=name)

Source File: alignments.py From REGAL with MIT License

5 votes

def score_alignment_matrix(alignment_matrix, topk = None, topk_score_weighted = False, true_alignments = None):
  n_nodes = alignment_matrix.shape[0]
  correct_nodes = []

  if topk is None:
		row_sums = alignment_matrix.sum(axis=1)
		row_sums[row_sums == 0] = 10e-6 #shouldn't affect much since dividing 0 by anything is 0
		alignment_matrix = alignment_matrix / row_sums[:, np.newaxis] #normalize

		alignment_score = score(alignment_matrix, true_alignments = true_alignments)
  else: 
		alignment_score = 0   
		if not sp.issparse(alignment_matrix):
			sorted_indices = np.argsort(alignment_matrix)
		
		for node_index in range(n_nodes):
		  target_alignment = node_index #default: assume identity mapping, and the node should be aligned to itself
		  if true_alignments is not None: #if we have true alignments (which we require), use those for each node
				target_alignment = int(true_alignments[node_index])
		  if sp.issparse(alignment_matrix):
				row, possible_alignments, possible_values = sp.find(alignment_matrix[node_index])
				node_sorted_indices = possible_alignments[possible_values.argsort()]
		  else:
				node_sorted_indices = sorted_indices[node_index]
		  if target_alignment in node_sorted_indices[-topk:]:
				if topk_score_weighted:
				  alignment_score += 1.0 / (n_nodes - np.argwhere(sorted_indices[node_index] == target_alignment)[0])
				else:
				  alignment_score += 1
				correct_nodes.append(node_index)
		alignment_score /= float(n_nodes)

  return alignment_score, set(correct_nodes)

Source File: base.py From dislib with Apache License 2.0

5 votes

def _get_rmse(blocks, users, items):
    test = Array._merge_blocks(blocks)
    x_idxs, y_idxs, recs = sparse.find(test)
    indices = zip(x_idxs, y_idxs)

    rmse = np.NaN
    if len(recs) > 0:
        preds = [users[x].dot(items[y].T) for x, y in indices]
        rmse = sqrt(mean_squared_error(recs, preds))

    return rmse

Source File: gpnn_reader_custom.py From graph-partition-neural-network-samples with MIT License

5 votes

def _pack_data(self, data_dict):
    data = GPNNData(
        data_dict["graph"],
        train_idx=self.train_idx,
        num_edgetype=self.num_edgetype,
        num_cluster=self._num_cluster,
        decomp_method=self._decomp_method,
        seed=self.seed)

    data.get_graph_partition()
    data.get_prop_index(data.cluster_graphs, data.cut_graph)
    self._param["num_node_cut"] = data.num_node_cut
    self._param["cluster_size"] = data.cluster_size
    logger.info("cluster_size = {}".format(self._param["cluster_size"]))

    data.train_idx = np.array([data._pos_map[xx] for xx in self.train_idx])
    data.val_idx = np.array([data._pos_map[xx] for xx in self.val_idx])
    data.test_idx = np.array([data._pos_map[xx] for xx in self.test_idx])

    row_idx, col_idx, values = sparse.find(sparse.csr_matrix(self._node_feat))

    # construc label
    feat_idx = np.array([data._pos_map[xx] for xx in xrange(self._num_nodes)])
    data.node_gt_label = np.zeros(self._num_nodes, dtype=np.int32)
    data.node_gt_label[feat_idx] = data_dict["all_label"]

    row_idx = np.array([data._pos_map[xx] for xx in row_idx])
    data.node_feat_shape = np.array(
        [self._num_nodes, self._feat_dim], dtype=np.int64)
    data.node_feat_indices = np.stack(
        [row_idx.astype(np.int64),
         col_idx.astype(np.int64)], axis=1)
    data.node_feat_values = values.astype(np.float32)

    return data

Python scipy.sparse.find() Examples