Python gensim.corpora.TextCorpus() Examples
The following are 19
code examples of gensim.corpora.TextCorpus().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gensim.corpora
, or try the search function
.
Example #1
Source File: test_miislita.py From topical_word_embeddings with MIT License | 6 votes |
def test_save_load_ability(self): """ Make sure we can save and load (un/pickle) TextCorpus objects (as long as the underlying input isn't a file-like object; we cannot pickle those). """ # construct corpus from file corpusname = datapath('miIslita.cor') miislita = CorpusMiislita(corpusname) # pickle to disk tmpf = get_tmpfile('tc_test.cpickle') miislita.save(tmpf) miislita2 = CorpusMiislita.load(tmpf) self.assertEqual(len(miislita), len(miislita2)) self.assertEqual(miislita.dictionary.token2id, miislita2.dictionary.token2id)
Example #2
Source File: test_miislita.py From topical_word_embeddings with MIT License | 6 votes |
def test_save_load_ability(self): """ Make sure we can save and load (un/pickle) TextCorpus objects (as long as the underlying input isn't a file-like object; we cannot pickle those). """ # construct corpus from file corpusname = datapath('miIslita.cor') miislita = CorpusMiislita(corpusname) # pickle to disk tmpf = get_tmpfile('tc_test.cpickle') miislita.save(tmpf) miislita2 = CorpusMiislita.load(tmpf) self.assertEqual(len(miislita), len(miislita2)) self.assertEqual(miislita.dictionary.token2id, miislita2.dictionary.token2id)
Example #3
Source File: test_miislita.py From topical_word_embeddings with MIT License | 6 votes |
def test_save_load_ability(self): """ Make sure we can save and load (un/pickle) TextCorpus objects (as long as the underlying input isn't a file-like object; we cannot pickle those). """ # construct corpus from file corpusname = datapath('miIslita.cor') miislita = CorpusMiislita(corpusname) # pickle to disk tmpf = get_tmpfile('tc_test.cpickle') miislita.save(tmpf) miislita2 = CorpusMiislita.load(tmpf) self.assertEqual(len(miislita), len(miislita2)) self.assertEqual(miislita.dictionary.token2id, miislita2.dictionary.token2id)
Example #4
Source File: test_miislita.py From topical_word_embeddings with MIT License | 6 votes |
def test_save_load_ability(self): """ Make sure we can save and load (un/pickle) TextCorpus objects (as long as the underlying input isn't a file-like object; we cannot pickle those). """ # construct corpus from file corpusname = datapath('miIslita.cor') miislita = CorpusMiislita(corpusname) # pickle to disk tmpf = get_tmpfile('tc_test.cpickle') miislita.save(tmpf) miislita2 = CorpusMiislita.load(tmpf) self.assertEqual(len(miislita), len(miislita2)) self.assertEqual(miislita.dictionary.token2id, miislita2.dictionary.token2id)
Example #5
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))
Example #6
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))
Example #7
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))
Example #8
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))
Example #9
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #10
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #11
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #12
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #13
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #14
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #15
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #16
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #17
Source File: GensimLDA.py From termite-data-server with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, corpusPath, tokenRegexStr): if tokenRegexStr is None: tokenRegexStr = GensimTermiteCorpusReader.DEFAULT_TOKEN_REGEX self.tokenRegexStr = tokenRegexStr self.tokenRegex = re.compile(self.tokenRegexStr) self.docIds = [] corpora.TextCorpus.__init__(self, corpusPath)
Example #18
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))
Example #19
Source File: test_miislita.py From topical_word_embeddings with MIT License | 5 votes |
def test_textcorpus(self): """Make sure TextCorpus can be serialized to disk. """ # construct corpus from file miislita = CorpusMiislita(datapath('head500.noblanks.cor.bz2')) # make sure serializing works ftmp = get_tmpfile('test_textcorpus.mm') corpora.MmCorpus.save_corpus(ftmp, miislita) self.assertTrue(os.path.exists(ftmp)) # make sure deserializing gives the same result miislita2 = corpora.MmCorpus(ftmp) self.assertEqual(list(miislita), list(miislita2))