Python gensim.models.LdaMulticore() Examples

The following are 4 code examples of gensim.models.LdaMulticore(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gensim.models , or try the search function .
Example #1
Source File: builder.py    From Greynir with GNU General Public License v3.0 6 votes vote down vote up
def create_lda_model(self, **kwargs):
        """ Create a Latent Dirichlet Allocation (LDA) model from the
            entire words database table """
        corpus_tfidf = self.load_tfidf_corpus()
        if self._dictionary is None:
            self.load_dictionary()
        # Initialize an LDA transformation
        lda = models.LdaMulticore(
            corpus_tfidf,
            id2word=self._dictionary,
            num_topics=self._dimensions,
            **kwargs
        )
        if self._verbose:
            lda.print_topics(num_topics=self._dimensions)
        # Save the generated model
        lda.save(self._LDA_MODEL_FILE.format(self._dimensions)) 
Example #2
Source File: lda_context_utils.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def build_topic_model_from_corpus(corpus, dictionary):
    """
    Builds a topic model with the given corpus and dictionary.
    The model is built using Latent Dirichlet Allocation

    :type corpus list
    :parameter corpus: a list of bag of words, each bag of words represents a
    document
    :type dictionary: gensim.corpora.Dictionary
    :parameter dictionary: a Dictionary object that contains the words that are
    permitted to belong to the document, words that are not in this dictionary
    will be ignored
    :rtype: gensim.models.ldamodel.LdaModel
    :return: an LdaModel built using the reviews contained in the records
    parameter
    """

    # numpy.random.seed(0)
    if Constants.LDA_MULTICORE:
        print('%s: lda multicore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = LdaMulticore(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS,
            workers=Constants.NUM_CORES - 1)
    else:
        print('%s: lda monocore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = ldamodel.LdaModel(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS)

    return topic_model 
Example #3
Source File: builder.py    From Greynir with GNU General Public License v3.0 5 votes vote down vote up
def load_lda_model(self):
        """ Load a previously generated LDA model """
        self._model = models.LdaMulticore.load(
            self._LDA_MODEL_FILE.format(self._dimensions), mmap="r"
        )
        self._model_name = "lda" 
Example #4
Source File: topics_analysis.py    From contextualLSTM with Apache License 2.0 4 votes vote down vote up
def topic_analysis(corpus, dictionary, models_path, technique):

    import uuid
    uuid = str(uuid.uuid4())
    print("[BLOCK] Starting models for context")
    sys.stdout.flush()

    if technique == "all" or technique == "hdp":
        t1 = time()
        # HDP model
        model = HdpModel(corpus, id2word=dictionary)
        model.save("%s/hdp_%s" % (models_path, uuid))
        del model
        t2 = time()
        print("[BLOCK] Training time for HDP model: %s" % (round(t2-t1, 2)))
        sys.stdout.flush()

    if technique == "all" or technique == "ldap":
        t1 = time()
        # Parallel LDA model
        model = LdaMulticore(corpus, id2word=dictionary, num_topics=100,  workers=23, passes=20)
        model.save("%s/lda_parallel_%s" % (models_path, uuid))
        del model
        t2 = time()
        print("[BLOCK] Training time for LDA multicore: %s" % (round(t2-t1, 2)))
    sys.stdout.flush()

    if technique == "all" or technique == "lsa":
        t1 = time()
        # LSA model
        model = LsiModel(corpus, id2word=dictionary, num_topics=400)
        model.save("%s/lsa_%s" % (models_path, uuid))
        del model
        t2 = time()
        print("[BLOCK] Training time for LSA: %s" % (round(t2-t1, 2)))
        sys.stdout.flush()

    if technique == "all" or technique == "ldao":
        t1 = time()
        # Online LDA model
        model = LdaModel(corpus, id2word=dictionary, num_topics=100, update_every=1, chunksize=10000, passes=5)
        model.save("%s/lda_online_%s" % (models_path, uuid))
        t2 = time()
        print("[BLOCK] Training time for LDA online: %s" % (round(t2-t1, 2)))
        sys.stdout.flush()

    if technique == "all" or technique == "lda":
        t1 = time()
        # Offline LDA model
        model = LdaModel(corpus, id2word=dictionary, num_topics=100,  update_every=0, passes=20)
        model.save("%s/lda_offline_%s" % (models_path, uuid))
        del model
        t2 = time()
        print("[BLOCK] Training time for LDA offline: %s" % (round(t2-t1, 2)))
        sys.stdout.flush()