Python gensim.models.LdaMulticore() Examples
The following are 4
code examples of gensim.models.LdaMulticore().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gensim.models
, or try the search function
.
Example #1
Source File: builder.py From Greynir with GNU General Public License v3.0 | 6 votes |
def create_lda_model(self, **kwargs): """ Create a Latent Dirichlet Allocation (LDA) model from the entire words database table """ corpus_tfidf = self.load_tfidf_corpus() if self._dictionary is None: self.load_dictionary() # Initialize an LDA transformation lda = models.LdaMulticore( corpus_tfidf, id2word=self._dictionary, num_topics=self._dimensions, **kwargs ) if self._verbose: lda.print_topics(num_topics=self._dimensions) # Save the generated model lda.save(self._LDA_MODEL_FILE.format(self._dimensions))
Example #2
Source File: lda_context_utils.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def build_topic_model_from_corpus(corpus, dictionary): """ Builds a topic model with the given corpus and dictionary. The model is built using Latent Dirichlet Allocation :type corpus list :parameter corpus: a list of bag of words, each bag of words represents a document :type dictionary: gensim.corpora.Dictionary :parameter dictionary: a Dictionary object that contains the words that are permitted to belong to the document, words that are not in this dictionary will be ignored :rtype: gensim.models.ldamodel.LdaModel :return: an LdaModel built using the reviews contained in the records parameter """ # numpy.random.seed(0) if Constants.LDA_MULTICORE: print('%s: lda multicore' % time.strftime("%Y/%m/%d-%H:%M:%S")) topic_model = LdaMulticore( corpus, id2word=dictionary, num_topics=Constants.TOPIC_MODEL_NUM_TOPICS, passes=Constants.TOPIC_MODEL_PASSES, iterations=Constants.TOPIC_MODEL_ITERATIONS, workers=Constants.NUM_CORES - 1) else: print('%s: lda monocore' % time.strftime("%Y/%m/%d-%H:%M:%S")) topic_model = ldamodel.LdaModel( corpus, id2word=dictionary, num_topics=Constants.TOPIC_MODEL_NUM_TOPICS, passes=Constants.TOPIC_MODEL_PASSES, iterations=Constants.TOPIC_MODEL_ITERATIONS) return topic_model
Example #3
Source File: builder.py From Greynir with GNU General Public License v3.0 | 5 votes |
def load_lda_model(self): """ Load a previously generated LDA model """ self._model = models.LdaMulticore.load( self._LDA_MODEL_FILE.format(self._dimensions), mmap="r" ) self._model_name = "lda"
Example #4
Source File: topics_analysis.py From contextualLSTM with Apache License 2.0 | 4 votes |
def topic_analysis(corpus, dictionary, models_path, technique): import uuid uuid = str(uuid.uuid4()) print("[BLOCK] Starting models for context") sys.stdout.flush() if technique == "all" or technique == "hdp": t1 = time() # HDP model model = HdpModel(corpus, id2word=dictionary) model.save("%s/hdp_%s" % (models_path, uuid)) del model t2 = time() print("[BLOCK] Training time for HDP model: %s" % (round(t2-t1, 2))) sys.stdout.flush() if technique == "all" or technique == "ldap": t1 = time() # Parallel LDA model model = LdaMulticore(corpus, id2word=dictionary, num_topics=100, workers=23, passes=20) model.save("%s/lda_parallel_%s" % (models_path, uuid)) del model t2 = time() print("[BLOCK] Training time for LDA multicore: %s" % (round(t2-t1, 2))) sys.stdout.flush() if technique == "all" or technique == "lsa": t1 = time() # LSA model model = LsiModel(corpus, id2word=dictionary, num_topics=400) model.save("%s/lsa_%s" % (models_path, uuid)) del model t2 = time() print("[BLOCK] Training time for LSA: %s" % (round(t2-t1, 2))) sys.stdout.flush() if technique == "all" or technique == "ldao": t1 = time() # Online LDA model model = LdaModel(corpus, id2word=dictionary, num_topics=100, update_every=1, chunksize=10000, passes=5) model.save("%s/lda_online_%s" % (models_path, uuid)) t2 = time() print("[BLOCK] Training time for LDA online: %s" % (round(t2-t1, 2))) sys.stdout.flush() if technique == "all" or technique == "lda": t1 = time() # Offline LDA model model = LdaModel(corpus, id2word=dictionary, num_topics=100, update_every=0, passes=20) model.save("%s/lda_offline_%s" % (models_path, uuid)) del model t2 = time() print("[BLOCK] Training time for LDA offline: %s" % (round(t2-t1, 2))) sys.stdout.flush()