Python scipy.sparse.tocsr() Examples
The following are 28
code examples of scipy.sparse.tocsr().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.sparse
, or try the search function
.
Example #1
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #2
Source File: matutils.py From ohmnet with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #3
Source File: matutils.py From ohmnet with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #4
Source File: matutils.py From xlinkBook with MIT License | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #5
Source File: matutils.py From xlinkBook with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #6
Source File: matutils.py From xlinkBook with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #7
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #8
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #9
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #10
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #11
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #12
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #13
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #14
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #15
Source File: matutils.py From pynlpini with GNU General Public License v2.0 | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #16
Source File: matutils.py From pynlpini with GNU General Public License v2.0 | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #17
Source File: matutils.py From pynlpini with GNU General Public License v2.0 | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #18
Source File: matutils.py From category2vec with GNU Lesser General Public License v3.0 | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #19
Source File: matutils.py From category2vec with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #20
Source File: matutils.py From category2vec with GNU Lesser General Public License v3.0 | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #21
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #22
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #23
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #24
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def __init__(self, sparse, documents_columns=True): if documents_columns: self.sparse = sparse.tocsc() else: self.sparse = sparse.tocsr().T # make sure shape[1]=number of docs (needed in len())
Example #25
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #26
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def unitvec(vec): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input (i.e., gensim vector=>gensim vector, or numpy array=>numpy array, scipy.sparse=>scipy.sparse). """ if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array vec = vec.tocsr() veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: # gensim sparse format? length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" if length != 1.0: return [(termid, val / length) for termid, val in vec] else: return list(vec) else: raise ValueError("unknown input type")
Example #27
Source File: matutils.py From topical_word_embeddings with MIT License | 5 votes |
def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() assert vec.shape[0] == 1 return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
Example #28
Source File: matutils.py From ohmnet with MIT License | 4 votes |
def unitvec(vec, norm='l2'): """ Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged. Output will be in the same format as input. """ if norm not in ('l1', 'l2'): raise ValueError("'%s' is not a supported norm. Currently supported norms are 'l1' and 'l2'." % norm) if scipy.sparse.issparse(vec): vec = vec.tocsr() if norm == 'l1': veclen = numpy.sum(numpy.abs(vec.data)) if norm == 'l2': veclen = numpy.sqrt(numpy.sum(vec.data ** 2)) if veclen > 0.0: return vec / veclen else: return vec if isinstance(vec, numpy.ndarray): vec = numpy.asarray(vec, dtype=float) if norm == 'l1': veclen = numpy.sum(numpy.abs(vec)) if norm == 'l2': veclen = blas_nrm2(vec) if veclen > 0.0: return blas_scal(1.0 / veclen, vec) else: return vec try: first = next(iter(vec)) # is there at least one element? except: return vec if isinstance(first, (tuple, list)) and len(first) == 2: if norm == 'l1': length = float(sum(abs(val) for _, val in vec)) if norm == 'l2': length = 1.0 * math.sqrt(sum(val ** 2 for _, val in vec)) assert length > 0.0, "sparse documents must not contain any explicit zero entries" return ret_normalized_vec(vec, length) else: raise ValueError("unknown input type")