Python scipy.sparse.tocsr() Examples

The following are 28 code examples of scipy.sparse.tocsr(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .
Example #1
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #2
Source File: matutils.py    From ohmnet with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #3
Source File: matutils.py    From ohmnet with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #4
Source File: matutils.py    From xlinkBook with MIT License 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #5
Source File: matutils.py    From xlinkBook with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #6
Source File: matutils.py    From xlinkBook with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #7
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #8
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #9
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #10
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #11
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #12
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #13
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #14
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #15
Source File: matutils.py    From pynlpini with GNU General Public License v2.0 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec):  # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))  # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2:  # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #16
Source File: matutils.py    From pynlpini with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T  # make sure shape[1]=number of docs (needed in len()) 
Example #17
Source File: matutils.py    From pynlpini with GNU General Public License v2.0 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #18
Source File: matutils.py    From category2vec with GNU Lesser General Public License v3.0 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #19
Source File: matutils.py    From category2vec with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #20
Source File: matutils.py    From category2vec with GNU Lesser General Public License v3.0 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #21
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #22
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #23
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #24
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def __init__(self, sparse, documents_columns=True):
        if documents_columns:
            self.sparse = sparse.tocsc()
        else:
            self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len()) 
Example #25
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #26
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def unitvec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input (i.e., gensim vector=>gensim vector,
    or numpy array=>numpy array, scipy.sparse=>scipy.sparse).
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.tocsr()
        veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format?
        length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        if length != 1.0:
            return [(termid, val / length) for termid, val in vec]
        else:
            return list(vec)
    else:
        raise ValueError("unknown input type") 
Example #27
Source File: matutils.py    From topical_word_embeddings with MIT License 5 votes vote down vote up
def scipy2sparse(vec, eps=1e-9):
    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
    vec = vec.tocsr()
    assert vec.shape[0] == 1
    return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps] 
Example #28
Source File: matutils.py    From ohmnet with MIT License 4 votes vote down vote up
def unitvec(vec, norm='l2'):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    Output will be in the same format as input.
    """
    if norm not in ('l1', 'l2'):
        raise ValueError("'%s' is not a supported norm. Currently supported norms are 'l1' and 'l2'." % norm)
    if scipy.sparse.issparse(vec):
        vec = vec.tocsr()
        if norm == 'l1':
            veclen = numpy.sum(numpy.abs(vec.data))
        if norm == 'l2':
            veclen = numpy.sqrt(numpy.sum(vec.data ** 2))
        if veclen > 0.0:
            return vec / veclen
        else:
            return vec

    if isinstance(vec, numpy.ndarray):
        vec = numpy.asarray(vec, dtype=float)
        if norm == 'l1':
            veclen = numpy.sum(numpy.abs(vec))
        if norm == 'l2':
            veclen = blas_nrm2(vec)
        if veclen > 0.0:
            return blas_scal(1.0 / veclen, vec)
        else:
            return vec

    try:
        first = next(iter(vec))     # is there at least one element?
    except:
        return vec

    if isinstance(first, (tuple, list)) and len(first) == 2:
        if norm == 'l1':
            length = float(sum(abs(val) for _, val in vec))
        if norm == 'l2':
            length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec))
        assert length > 0.0, "sparse documents must not contain any explicit zero entries"
        return ret_normalized_vec(vec, length)
    else:
        raise ValueError("unknown input type")