Python Examples of gensim.models.Doc2Vec.load

Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License

6 votes

def load_idf_dict(self, dict_name='idf_dict'):

        if dict_name not in self.dict_manager:

            word_frequencies = {}

            file_name = config.EX_DICT_DIR + '/word-frequencies.txt'
            print('load dict from file %s \n' % file_name)

            f_dict = utils.create_read_file(file_name)

            for idx, line in enumerate(f_dict):
                if idx == 0:
                    totfreq = int(line)
                else:
                    w, freq = line.strip().split()
                    freq = float(freq)
                    if freq < 10:
                        continue
                    word_frequencies[w] = math.log(totfreq / freq)  / math.log(2)
            self.dict_manager[dict_name] = word_frequencies

        return self.dict_manager[dict_name]

Source File: document2vec.py From Document2Vec with MIT License

5 votes

def load_from_pickle(self, filename):
        """
        This loads a pretrained Word2Vec file into this Doc2Vec class.
        """
        model_w2v = Doc2Vec.load(filename)
        for attr in dir(model_w2v):
            if attr == '__dict__':
                continue
            # Skip methods that we already have in this class
            if attr in dir(self) and callable(getattr(model_w2v, attr)):
                continue
            try:
                setattr(self, attr, getattr(model_w2v, attr))
            except AttributeError:
                continue

Source File: sent_eval.py From embedding with MIT License

5 votes

def __init__(self, model_fname="data/doc2vec.vecs", use_notebook=False):
        self.model = Doc2Vec.load(model_fname)
        self.doc2idx = {el:idx for idx, el in enumerate(self.model.docvecs.doctags.keys())}
        self.use_notebook = use_notebook

Source File: sent_eval.py From embedding with MIT License

5 votes

def __init__(self, model_path="data/lda.results", tokenizer_name="mecab"):
        self.tokenizer = get_tokenizer(tokenizer_name)
        self.all_topics = self.load_results(model_path + ".results")
        self.model = LdaModel.load(model_path + ".model")

Source File: doc2vec.py From KATE with BSD 3-Clause "New" or "Revised" License

5 votes

def load_doc2vec(mod_file):
    return Doc2Vec.load(mod_file)

Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License

5 votes

def load_dict(self, dict_name, path=config.DICT_DIR):
        """
        path: config.DICT_DIR
              config.DICT_EX_DIR
        """
        if dict_name not in self.dict_manager:

            dict_object = {}

            cur_dir = os.path.dirname(__file__)
            path = os.path.join(cur_dir, '../resources')

            ''' load dict from file '''
            file_name = path + '/dict_%s.txt' % dict_name
            print('load dict from file %s \n' % file_name)

            f_dict = utils.create_read_file(file_name)

            for idx, line in enumerate(f_dict):
                line = line.strip().split('\t')
                if len(line) == 1:
                    dict_object[line[0]] = idx + 1
                elif len(line) == 2:
                    dict_object[line[0]] = eval(line[1])
                else:
                    raise NotImplementedError

            self.dict_manager[dict_name] = dict_object

        return self.dict_manager[dict_name]

Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License

5 votes

def load_doc2vec(self):
        dict_name = 'doc2vec'
        if dict_name not in self.dict_manager:
            from gensim.models import Doc2Vec
            model = Doc2Vec.load(config.EX_DICT_DIR + '/doc2vec.model')
            self.dict_manager[dict_name] = model
        return self.dict_manager[dict_name]

Source File: test_vec4ir.py From vec4ir with MIT License

5 votes

def test_doc2vec_inference_saveload():
    tagged_docs = [TaggedDocument(simple_preprocess(doc), [i])
                   for i, doc in enumerate(documents)]
    model = Doc2Vec(tagged_docs, epochs=1, min_count=1, vector_size=10)
    model.save(TEST_FILE)
    del model
    model = Doc2Vec.load(TEST_FILE)
    os.remove(TEST_FILE)
    d2v = Doc2VecInference(model, DEFAULT_ANALYZER)
    match_op = Matching()
    retrieval = Retrieval(d2v, matching=match_op).fit(documents)
    result = retrieval.query("scientists")
    assert result[0] == 1