Python Examples of scipy.sparse.isspmatrix

Source File: _qc.py From scanpy with BSD 3-Clause "New" or "Revised" License

6 votes

def top_proportions(mtx: Union[np.array, spmatrix], n: int):
    """\
    Calculates cumulative proportions of top expressed genes

    Parameters
    ----------
    mtx
        Matrix, where each row is a sample, each column a feature.
    n
        Rank to calculate proportions up to. Value is treated as 1-indexed,
        `n=50` will calculate cumulative proportions up to the 50th most
        expressed gene.
    """
    if issparse(mtx):
        if not isspmatrix_csr(mtx):
            mtx = csr_matrix(mtx)
        # Allowing numba to do more
        return top_proportions_sparse_csr(mtx.data, mtx.indptr, np.array(n))
    else:
        return top_proportions_dense(mtx, n)

Source File: LinearSolver.py From florence with MIT License

6 votes

def GetCuthillMcKeePermutation(self,A):
        """Applies Cuthill-Mckee permutation to reduce the sparse matrix bandwidth

            input:
                A:                    [csc_matrix or csr_matrix]

            returns:
                perm:                 [1D array] of permutation such that A[perm,:][:,perm]
                                      has its non-zero elements closer to the diagonal
        """

        if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
            raise TypeError("Matrix must be in CSC or CSR sparse format "
                "for Cuthill-McKee permutation")

        if int(sp.__version__.split('.')[1]) >= 15:
            from scipy.sparse.csgraph import reverse_cuthill_mckee
            perm = reverse_cuthill_mckee(A)
        else:
            from Florence.Tensor import symrcm
            perm = symrcm(A)

        return perm

Source File: LinearSolver.py From florence with MIT License

6 votes

def GetPreconditioner(self,A, type="amg_smoothed_aggregation"):
        """Applies a suitable preconditioner to sparse matrix A
            based on algebraic multigrid of incomplete LU/Cholesky factorisation

            input:
                A:                      [csc_matrix or csc_matrix]
                type:                   [str] either "amg_smoothed_aggregation" for
                                        a preconditioner based on algebraic multigrid
                                        or "incomplete_lu" for scipy's spilu linear
                                        operator

            returns:                    A preconditioner that can be used in conjunction
                                        with scipy's sparse linear iterative solvers
                                        (the M keyword in scipy's iterative solver)
        """

        if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
            raise TypeError("Matrix must be in CSC or CSR sparse format for preconditioning")

        ml = smoothed_aggregation_solver(A)
        return ml.aspreconditioner()

Source File: fingerprints.py From oddt with BSD 3-Clause "New" or "Revised" License

6 votes

def csr_matrix_to_sparse(fp):
    """Sparsify a CSR fingerprint.

    .. versionadded:: 0.6

    Parameters
    ----------
    fp : csr_matrix
        Fingerprint in a CSR form.

    Returns
    -------
    fp : np.array
        Sparse fingerprint - an array of "on" integers. In cas of count vectors,
        the indices are dupplicated according to count.
    """
    if not isspmatrix_csr(fp):
        raise ValueError('fp is not CSR sparse matrix but %s (%s)' %
                         (type(fp), fp))
    # FIXME: change these methods to work for stacked fps (2D)
    return np.repeat(fp.indices, fp.data)


# ranges for hashing function

Source File: _simple.py From scanpy with BSD 3-Clause "New" or "Revised" License

6 votes

def _downsample_total_counts(X, total_counts, random_state, replace):
    total_counts = int(total_counts)
    total = X.sum()
    if total < total_counts:
        return X
    if issparse(X):
        original_type = type(X)
        if not isspmatrix_csr(X):
            X = csr_matrix(X)
        _downsample_array(
            X.data,
            total_counts,
            random_state=random_state,
            replace=replace,
            inplace=True,
        )
        X.eliminate_zeros()
        if original_type is not csr_matrix:
            X = original_type(X)
    else:
        v = X.reshape(np.multiply(*X.shape))
        _downsample_array(
            v, total_counts, random_state, replace=replace, inplace=True
        )
    return X

Source File: base.py From muffnn with BSD 3-Clause "New" or "Revised" License

6 votes

def _sparse_matrix_data(X):
    """Prepare the sparse matrix for conversion to TensorFlow.

    Parameters
    ----------
    X : sparse matrix

    Returns
    -------
    indices : numpy array with shape (X.nnz, 2)
              describing the indices with values in X.
    values : numpy array with shape (X.nnz)
             describing the values at each index
    """
    if sp.isspmatrix_csr(X):
        return _csr_data(X)
    else:
        return _csr_data(X.tocsr())

Source File: matrixtools.py From pyGSTi with Apache License 2.0

6 votes

def safeimag(A, inplace=False, check=False):
    """
    Returns the imaginary-part of `A` correctly when `A` is either a dense array
    or a sparse matrix
    """
    if check:
        assert(safenorm(A, 'real') < 1e-6), "Check failed: taking imag-part of matrix w/nonzero real part"
    if _sps.issparse(A):
        if _sps.isspmatrix_csr(A):
            if inplace:
                ret = _sps.csr_matrix((_np.imag(A.data), A.indices, A.indptr), shape=A.shape, dtype='d')
            else:  # copy
                ret = _sps.csr_matrix((_np.imag(A.data).copy(), A.indices.copy(),
                                       A.indptr.copy()), shape=A.shape, dtype='d')
            ret.eliminate_zeros()
            return ret
        else:
            raise NotImplementedError("safereal() doesn't work with %s matrices yet" % str(type(A)))
    else:
        return _np.imag(A)

Source File: matrixtools.py From pyGSTi with Apache License 2.0

6 votes

def safereal(A, inplace=False, check=False):
    """
    Returns the real-part of `A` correctly when `A` is either a dense array or
    a sparse matrix
    """
    if check:
        assert(safenorm(A, 'imag') < 1e-6), "Check failed: taking real-part of matrix w/nonzero imaginary part"
    if _sps.issparse(A):
        if _sps.isspmatrix_csr(A):
            if inplace:
                ret = _sps.csr_matrix((_np.real(A.data), A.indices, A.indptr), shape=A.shape, dtype='d')
            else:  # copy
                ret = _sps.csr_matrix((_np.real(A.data).copy(), A.indices.copy(),
                                       A.indptr.copy()), shape=A.shape, dtype='d')
            ret.eliminate_zeros()
            return ret
        else:
            raise NotImplementedError("safereal() doesn't work with %s matrices yet" % str(type(A)))
    else:
        return _np.real(A)

Source File: data_io.py From Kaggler with MIT License

6 votes

def save_hdf5(X, y, path):
    """Save data as a HDF5 file.

    Args:
        X (numpy or scipy sparse matrix): Data matrix
        y (numpy array): Target vector.
        path (str): Path to the HDF5 file to save data.
    """

    with h5py.File(path, 'w') as f:
        is_sparse = 1 if sparse.issparse(X) else 0
        f['issparse'] = is_sparse
        f['target'] = y

        if is_sparse:
            if not sparse.isspmatrix_csr(X):
                X = X.tocsr()

            f['shape'] = np.array(X.shape)
            f['data'] = X.data
            f['indices'] = X.indices
            f['indptr'] = X.indptr
        else:
            f['data'] = X

Source File: test_matrix_csr.py From lkpy with MIT License

6 votes

def test_csr_to_sps():
    # initialize sparse matrix
    mat = np.random.randn(10, 5)
    mat[mat <= 0] = 0
    # get COO
    smat = sps.coo_matrix(mat)
    # make sure it's sparse
    assert smat.nnz == np.sum(mat > 0)

    csr = lm.CSR.from_coo(smat.row, smat.col, smat.data, shape=smat.shape)
    assert csr.nnz == smat.nnz
    assert csr.nrows == smat.shape[0]
    assert csr.ncols == smat.shape[1]

    smat2 = csr.to_scipy()
    assert sps.isspmatrix(smat2)
    assert sps.isspmatrix_csr(smat2)

    for i in range(csr.nrows):
        assert smat2.indptr[i] == csr.rowptrs[i]
        assert smat2.indptr[i+1] == csr.rowptrs[i+1]
        sp = smat2.indptr[i]
        ep = smat2.indptr[i+1]
        assert all(smat2.indices[sp:ep] == csr.colinds[sp:ep])
        assert all(smat2.data[sp:ep] == csr.values[sp:ep])

Source File: matrix_operator.py From qiskit-aqua with Apache License 2.0

6 votes

def __init__(self, matrix, basis=None, z2_symmetries=None, atol=1e-12, name=None):
        """
        Args:
            matrix (numpy.ndarray or scipy.sparse.csr_matrix):
                    a 2-D sparse matrix represents operator (using CSR format internally)
            basis (list[tuple(object, [int])], optional): the grouping basis, each element is a
                                                          tuple composed of the basis
                                                          and the indices to paulis which are
                                                          belonged to that group.
                                                          e.g., if tpb basis is used, the object
                                                          will be a pauli.
                                                          by default, the group is equal to
                                                          non-grouping, each pauli is its own basis.
            z2_symmetries (Z2Symmetries): represent the Z2 symmetries
            atol (float): atol
            name (str): name
        """
        super().__init__(basis, z2_symmetries, name)
        if matrix is not None:
            matrix = matrix if scisparse.issparse(matrix) else scisparse.csr_matrix(matrix)
            matrix = matrix if scisparse.isspmatrix_csr(matrix) else matrix.to_csr(copy=True)
        self._matrix = matrix
        self._atol = atol

Source File: __init__.py From lkpy with MIT License

6 votes

def from_scipy(cls, mat, copy=True):
        """
        Convert a scipy sparse matrix to an internal CSR.

        Args:
            mat(scipy.sparse.spmatrix): a SciPy sparse matrix.
            copy(bool): if ``False``, reuse the SciPy storage if possible.

        Returns:
            CSR: a CSR matrix.
        """
        if not sps.isspmatrix_csr(mat):
            mat = mat.tocsr(copy=copy)
        rp = np.require(mat.indptr, np.intc, 'C')
        if copy and rp is mat.indptr:
            rp = rp.copy()
        cs = np.require(mat.indices, np.intc, 'C')
        if copy and cs is mat.indices:
            cs = cs.copy()
        vs = mat.data.copy() if copy else mat.data
        return cls(mat.shape[0], mat.shape[1], mat.nnz, rp, cs, vs)

Source File: markovChain.py From discreteMarkovChain with MIT License

5 votes

def getTransitionMatrix(self,probabilities=True):
        """
        If self.P has been given already, we will reuse it and convert it to a sparse csr matrix if needed.
        Otherwise, we will generate it using the direct or indirect method.         
        Since most solution methods use a probability matrix, this is the default setting. 
        By setting probabilities=False we can also return a rate matrix.
        """
        if self.P is not None:               
            if isspmatrix(self.P): 
                if not isspmatrix_csr(self.P):
                    self.P = self.P.tocsr() 
            else:
                assert isinstance(self.P, np.ndarray) and self.P.ndim==2 and self.P.shape[0]==self.P.shape[1],'P needs to be a 2d numpy array with an equal number of columns and rows'                     
                self.P = csr_matrix(self.P)   
                
        elif self.direct == True:
            self.P = self.directInitialMatrix()
            
        else:
            self.P = self.indirectInitialMatrix(self.initialState)   

        if probabilities:    
            P = self.convertToProbabilityMatrix(self.P)
        else: 
            P = self.convertToRateMatrix(self.P)   
        
        return P

Source File: text.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    else:
        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: deepcut.py From deepcut with MIT License

5 votes

def _document_frequency(X):
    """
    Count the number of non-zero values for each feature in sparse X.
    """
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: STFIWF.py From 2016CCF_BDCI_Sougou with MIT License

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])

    else:

        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: text.py From twitter-stock-recommendation with MIT License

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    else:
        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: test_20news.py From twitter-stock-recommendation with MIT License

5 votes

def test_20news_vectorized():
    try:
        datasets.fetch_20newsgroups(subset='all',
                                    download_if_missing=False)
    except IOError:
        raise SkipTest("Download 20 newsgroups to run this test")

    # test subset = train
    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314, 130107))
    assert_equal(bunch.target.shape[0], 11314)
    assert_equal(bunch.data.dtype, np.float64)

    # test subset = test
    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (7532, 130107))
    assert_equal(bunch.target.shape[0], 7532)
    assert_equal(bunch.data.dtype, np.float64)

    # test subset = all
    bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314 + 7532, 130107))
    assert_equal(bunch.target.shape[0], 11314 + 7532)
    assert_equal(bunch.data.dtype, np.float64)

Source File: data.py From twitter-stock-recommendation with MIT License

5 votes

def inverse_transform(self, X, copy=None):
        """Scale back the data to the original representation

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.

        Returns
        -------
        X_tr : array-like, shape [n_samples, n_features]
            Transformed array.
        """
        check_is_fitted(self, 'scale_')

        copy = copy if copy is not None else self.copy
        if sparse.issparse(X):
            if self.with_mean:
                raise ValueError(
                    "Cannot uncenter sparse matrices: pass `with_mean=False` "
                    "instead See docstring for motivation and alternatives.")
            if not sparse.isspmatrix_csr(X):
                X = X.tocsr()
                copy = False
            if copy:
                X = X.copy()
            if self.scale_ is not None:
                inplace_column_scale(X, self.scale_)
        else:
            X = np.asarray(X)
            if copy:
                X = X.copy()
            if self.with_std:
                X *= self.scale_
            if self.with_mean:
                X += self.mean_
        return X

Source File: test_data.py From twitter-stock-recommendation with MIT License

5 votes

def test_add_dummy_feature_csr():
    X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert_true(sparse.isspmatrix_csr(X), X)
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])

Source File: _k_means_0_22.py From daal4py with Apache License 2.0

5 votes

def predict(self, X, sample_weight=None):
    """Predict the closest cluster each sample in X belongs to.

    In the vector quantization literature, `cluster_centers_` is called
    the code book and each value returned by `predict` is the index of
    the closest code in the code book.

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
       New data to predict.

    sample_weight : array-like, shape (n_samples,), optional
        The weights for each observation in X. If None, all observations
        are assigned equal weight (default: None)

    Returns
    -------
    labels : array, shape [n_samples,]
        Index of the cluster each sample belongs to.
    """
    check_is_fitted(self)

    X = self._check_test_data(X)

    daal_ready = sample_weight is None and hasattr(X, '__array__') # or sp.isspmatrix_csr(X)

    if daal_ready:
        logging.info("sklearn.cluster.KMeans.predict: " + method_uses_daal)
        return _daal4py_k_means_predict(X, self.n_clusters, self.cluster_centers_)[0]
    else:
        logging.info("sklearn.cluster.KMeans.predict: " + method_uses_sklearn)
        x_squared_norms = row_norms(X, squared=True)
        return _labels_inertia(X, sample_weight, x_squared_norms,
                               self.cluster_centers_)[0]

Source File: STFIWF.py From 2016_CCFsougou2 with MIT License

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])

    else:

        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: arpack.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def get_inv_matvec(M, symmetric=False, tol=0):
    if isdense(M):
        return LuInv(M).matvec
    elif isspmatrix(M):
        if isspmatrix_csr(M) and symmetric:
            M = M.T
        return SpLuInv(M).matvec
    else:
        return IterInv(M, tol=tol).matvec

Source File: data.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def inverse_transform(self, X, copy=None):
        """Scale back the data to the original representation

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.

        Returns
        -------
        X_tr : array-like, shape [n_samples, n_features]
            Transformed array.
        """
        check_is_fitted(self, 'scale_')

        copy = copy if copy is not None else self.copy
        if sparse.issparse(X):
            if self.with_mean:
                raise ValueError(
                    "Cannot uncenter sparse matrices: pass `with_mean=False` "
                    "instead See docstring for motivation and alternatives.")
            if not sparse.isspmatrix_csr(X):
                X = X.tocsr()
                copy = False
            if copy:
                X = X.copy()
            if self.scale_ is not None:
                inplace_column_scale(X, self.scale_)
        else:
            X = np.asarray(X)
            if copy:
                X = X.copy()
            if self.with_std:
                X *= self.scale_
            if self.with_mean:
                X += self.mean_
        return X

Source File: arpack.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def get_OPinv_matvec(A, M, sigma, symmetric=False, tol=0):
    if sigma == 0:
        return get_inv_matvec(A, symmetric=symmetric, tol=tol)

    if M is None:
        #M is the identity matrix
        if isdense(A):
            if (np.issubdtype(A.dtype, np.complexfloating)
                    or np.imag(sigma) == 0):
                A = np.copy(A)
            else:
                A = A + 0j
            A.flat[::A.shape[1] + 1] -= sigma
            return LuInv(A).matvec
        elif isspmatrix(A):
            A = A - sigma * eye(A.shape[0])
            if symmetric and isspmatrix_csr(A):
                A = A.T
            return SpLuInv(A.tocsc()).matvec
        else:
            return IterOpInv(_aslinearoperator_with_dtype(A),
                              M, sigma, tol=tol).matvec
    else:
        if ((not isdense(A) and not isspmatrix(A)) or
                (not isdense(M) and not isspmatrix(M))):
            return IterOpInv(_aslinearoperator_with_dtype(A),
                              _aslinearoperator_with_dtype(M),
                              sigma, tol=tol).matvec
        elif isdense(A) or isdense(M):
            return LuInv(A - sigma * M).matvec
        else:
            OP = A - sigma * M
            if symmetric and isspmatrix_csr(OP):
                OP = OP.T
            return SpLuInv(OP.tocsc()).matvec


# ARPACK is not threadsafe or reentrant (SAVE variables), so we need a
# lock and a re-entering check.

Source File: bm25transformer.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    else:
        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: bm25.py From vec4ir with MIT License

5 votes

def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    else:
        return np.diff(sp.csc_matrix(X, copy=False).indptr)

Source File: test_matrix.py From lkpy with MIT License

5 votes

def test_sparse_matrix_scipy_implicit():
    ratings = ml_test.ratings
    ratings = ratings.loc[:, ['user', 'item']]
    mat, uidx, iidx = lm.sparse_ratings(ratings, scipy=True)

    assert sps.issparse(mat)
    assert sps.isspmatrix_csr(mat)
    assert len(uidx) == ratings.user.nunique()
    assert len(iidx) == ratings.item.nunique()

    assert all(mat.data == 1.0)

Source File: test_matrix.py From lkpy with MIT License

5 votes

def test_sparse_matrix_scipy():
    ratings = ml_test.ratings
    mat, uidx, iidx = lm.sparse_ratings(ratings, scipy=True)

    assert sps.issparse(mat)
    assert sps.isspmatrix_csr(mat)
    assert len(uidx) == ratings.user.nunique()
    assert len(iidx) == ratings.item.nunique()

    # user indicators should correspond to user item counts
    ucounts = ratings.groupby('user').item.count()
    ucounts = ucounts.loc[uidx].cumsum()
    assert all(mat.indptr[1:] == ucounts.values)

Source File: anndata.py From anndata with BSD 3-Clause "New" or "Revised" License

5 votes

def transpose(self) -> "AnnData":
        """\
        Transpose whole object.

        Data matrix is transposed, observations and variables are interchanged.

        Ignores `.raw`.
        """
        if not self.isbacked:
            X = self.X
        else:
            X = self.file["X"]
        if self.is_view:
            raise ValueError(
                "You’re trying to transpose a view of an `AnnData`, "
                "which is currently not implemented. Call `.copy()` before transposing."
            )

        def t_csr(m: sparse.spmatrix) -> sparse.csr_matrix:
            return m.T.tocsr() if sparse.isspmatrix_csr(m) else m.T

        return AnnData(
            t_csr(X),
            obs=self.var,
            var=self.obs,
            # we're taking a private attributes here to be able to modify uns of the original object
            uns=self._uns,
            obsm=self.varm.flipped(),
            varm=self.obsm.flipped(),
            obsp=self.varp.copy(),
            varp=self.obsp.copy(),
            filename=self.filename,
            layers={k: t_csr(v) for k, v in self.layers.items()},
            dtype=self.X.dtype.name,
        )

Python scipy.sparse.isspmatrix_csr() Examples