Python gensim.models.ldamodel.LdaModel() Examples
The following are 30
code examples of gensim.models.ldamodel.LdaModel().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gensim.models.ldamodel
, or try the search function
.
Example #1
Source File: tm_gensim.py From tmtoolkit with Apache License 2.0 | 6 votes |
def fit_model(self, data, params, return_data=False): """ Fit model to `data` using gensim with parameter set `params`. """ from gensim.models.ldamodel import LdaModel dictionary = params.pop('dictionary', None) if hasattr(data, 'dtype') and hasattr(data, 'shape') and hasattr(data, 'transpose'): corpus = dtm_to_gensim_corpus(data) dtm = data else: if isinstance(data, tuple) and len(data) == 2: dictionary, corpus = data else: corpus = data dtm = gensim_corpus_to_dtm(corpus) model = LdaModel(corpus, id2word=dictionary, **params) if return_data: return model, (corpus, dtm) else: return model
Example #2
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #3
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #4
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #5
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #6
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #7
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #8
Source File: similarity.py From bugbug with Mozilla Public License 2.0 | 6 votes |
def __init__( self, cleanup_urls=True, nltk_tokenizer=False, confidence_threshold=0.8 ): super().__init__( cleanup_urls=cleanup_urls, nltk_tokenizer=nltk_tokenizer, confidence_threshold=confidence_threshold, ) self.corpus = [] self.bug_ids = [] for bug in bugzilla.get_bugs(): self.corpus.append(self.text_preprocess(self.get_text(bug))) self.bug_ids.append(bug["id"]) indexes = list(range(len(self.corpus))) random.shuffle(indexes) self.corpus = [self.corpus[idx] for idx in indexes] self.bug_ids = [self.bug_ids[idx] for idx in indexes] self.dictionary = Dictionary(self.corpus) self.model = LdaModel([self.dictionary.doc2bow(text) for text in self.corpus])
Example #9
Source File: lda_context_utils.py From yelp with GNU Lesser General Public License v2.1 | 6 votes |
def update_reviews_with_topics(topic_model, corpus_list, reviews): """ :type minimum_probability: float :param minimum_probability: :type topic_model: LdaModel :param topic_model: :type corpus_list: list :param reviews: """ # print('reviews length', len(reviews)) for review, corpus in zip(reviews, corpus_list): review[Constants.TOPICS_FIELD] =\ topic_model.get_document_topics(corpus) non_zero_topics = [topic[0] for topic in review[Constants.TOPICS_FIELD]] for topic_index in range(Constants.TOPIC_MODEL_NUM_TOPICS): if topic_index not in non_zero_topics: review[Constants.TOPICS_FIELD].insert( topic_index, [topic_index, 0.0])
Example #10
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #11
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #12
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #13
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #14
Source File: test_models.py From topical_word_embeddings with MIT License | 6 votes |
def testTransform(self): passed = False # sometimes, LDA training gets stuck at a local minimum # in that case try re-training the model from scratch, hoping for a # better random initialization for i in range(5): # restart at most 5 times # create the transformation model model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100) model.update(corpus) # transform one document doc = list(corpus)[0] transformed = model[doc] vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests expected = [0.13, 0.87] passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, sorted(vec), sorted(expected))) self.assertTrue(passed)
Example #15
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #16
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testPersistence(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) model.save(testfile()) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
Example #17
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testTopicSeeding(self): passed = False for topic in range(2): # try seeding it both ways round, check you get the same # topics out but with which way round they are depending # on the way round they're seeded for i in range(5): # restart at most 5 times eta = numpy.ones((2, len(dictionary))) * 0.5 system = dictionary.token2id[u'system'] trees = dictionary.token2id[u'trees'] # aggressively seed the word 'system', in one of the # two topics, 10 times higher than the other words eta[topic, system] *= 10 model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta) model.update(corpus) topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)] # check that the word system in the topic we seeded, got a high weight, # and the word 'trees' (the main word in the other topic) a low weight -- # and vice versa for the other topic (which we didn't seed with 'system') result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)], [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]] expected = [[0.385, 0.022], [0.025, 0.157]] passed = numpy.allclose(result, expected, atol=1e-2) if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, result, expected)) self.assertTrue(passed)
Example #18
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #19
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #20
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testPersistence(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) model.save(testfile()) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
Example #21
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testTopicSeeding(self): passed = False for topic in range(2): # try seeding it both ways round, check you get the same # topics out but with which way round they are depending # on the way round they're seeded for i in range(5): # restart at most 5 times eta = numpy.ones((2, len(dictionary))) * 0.5 system = dictionary.token2id[u'system'] trees = dictionary.token2id[u'trees'] # aggressively seed the word 'system', in one of the # two topics, 10 times higher than the other words eta[topic, system] *= 10 model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta) model.update(corpus) topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)] # check that the word system in the topic we seeded, got a high weight, # and the word 'trees' (the main word in the other topic) a low weight -- # and vice versa for the other topic (which we didn't seed with 'system') result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)], [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]] expected = [[0.385, 0.022], [0.025, 0.157]] passed = numpy.allclose(result, expected, atol=1e-2) if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, result, expected)) self.assertTrue(passed)
Example #22
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testTopicSeeding(self): passed = False for topic in range(2): # try seeding it both ways round, check you get the same # topics out but with which way round they are depending # on the way round they're seeded for i in range(5): # restart at most 5 times eta = numpy.ones((2, len(dictionary))) * 0.5 system = dictionary.token2id[u'system'] trees = dictionary.token2id[u'trees'] # aggressively seed the word 'system', in one of the # two topics, 10 times higher than the other words eta[topic, system] *= 10 model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta) model.update(corpus) topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)] # check that the word system in the topic we seeded, got a high weight, # and the word 'trees' (the main word in the other topic) a low weight -- # and vice versa for the other topic (which we didn't seed with 'system') result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)], [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]] expected = [[0.385, 0.022], [0.025, 0.157]] passed = numpy.allclose(result, expected, atol=1e-2) if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, result, expected)) self.assertTrue(passed)
Example #23
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #24
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testPersistence(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) model.save(testfile()) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
Example #25
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testPersistence(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) model.save(testfile()) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
Example #26
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testTopicSeeding(self): passed = False for topic in range(2): # try seeding it both ways round, check you get the same # topics out but with which way round they are depending # on the way round they're seeded for i in range(5): # restart at most 5 times eta = numpy.ones((2, len(dictionary))) * 0.5 system = dictionary.token2id[u'system'] trees = dictionary.token2id[u'trees'] # aggressively seed the word 'system', in one of the # two topics, 10 times higher than the other words eta[topic, system] *= 10 model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta) model.update(corpus) topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)] # check that the word system in the topic we seeded, got a high weight, # and the word 'trees' (the main word in the other topic) a low weight -- # and vice versa for the other topic (which we didn't seed with 'system') result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)], [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]] expected = [[0.385, 0.022], [0.025, 0.157]] passed = numpy.allclose(result, expected, atol=1e-2) if passed: break logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % (i, result, expected)) self.assertTrue(passed)
Example #27
Source File: test_models.py From topical_word_embeddings with MIT License | 5 votes |
def testLargeMmap(self): model = ldamodel.LdaModel(self.corpus, num_topics=2) # simulate storing large arrays separately model.save(testfile(), sep_limit=0) model2 = ldamodel.LdaModel.load(testfile()) self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector #endclass TestLdaModel
Example #28
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #29
Source File: lda_worker.py From topical_word_embeddings with MIT License | 5 votes |
def initialize(self, myid, dispatcher, **model_params): self.lock_update = threading.Lock() self.jobsdone = 0 # how many jobs has this worker completed? self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? self.dispatcher = dispatcher self.finished = False logger.info("initializing worker #%s" % myid) self.model = ldamodel.LdaModel(**model_params)
Example #30
Source File: lda_context_utils.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def get_topic_distribution(record, lda_model, dictionary, minimum_probability, sampling_method=None, max_words=None): """ :type record: dict :type lda_model: LdaModel :type minimum_probability: float :param sampling_method: a float in the range [0,1] that indicates the proportion of text that should be sampled from the review. It can also take the string value of 'max', indicating that only the word with the highest probability from the topic will be sampled text. If None then all the review text is taken :param max_words: is the set of words with maximum probability for each contextual topic """ # review_bow = [record[Constants.BOW_FIELD]] # review_bow =\ # sample_bag_of_words(review_bow, sampling_method, max_words) # corpus = dictionary.doc2bow(review_bow[0]) corpus = record[Constants.CORPUS_FIELD] lda_corpus = lda_model.get_document_topics( corpus, minimum_probability=minimum_probability) topic_distribution = numpy.zeros(lda_model.num_topics) for pair in lda_corpus: topic_distribution[pair[0]] = pair[1] return topic_distribution