Python gensim.models.Doc2Vec.load() Examples
The following are 15
code examples of gensim.models.Doc2Vec.load().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gensim.models.Doc2Vec
, or try the search function
.
Example #1
Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def load_idf_dict(self, dict_name='idf_dict'): if dict_name not in self.dict_manager: word_frequencies = {} file_name = config.EX_DICT_DIR + '/word-frequencies.txt' print('load dict from file %s \n' % file_name) f_dict = utils.create_read_file(file_name) for idx, line in enumerate(f_dict): if idx == 0: totfreq = int(line) else: w, freq = line.strip().split() freq = float(freq) if freq < 10: continue word_frequencies[w] = math.log(totfreq / freq) / math.log(2) self.dict_manager[dict_name] = word_frequencies return self.dict_manager[dict_name]
Example #2
Source File: document2vec.py From Document2Vec with MIT License | 5 votes |
def load_from_pickle(self, filename): """ This loads a pretrained Word2Vec file into this Doc2Vec class. """ model_w2v = Doc2Vec.load(filename) for attr in dir(model_w2v): if attr == '__dict__': continue # Skip methods that we already have in this class if attr in dir(self) and callable(getattr(model_w2v, attr)): continue try: setattr(self, attr, getattr(model_w2v, attr)) except AttributeError: continue
Example #3
Source File: sent_eval.py From embedding with MIT License | 5 votes |
def __init__(self, model_fname="data/doc2vec.vecs", use_notebook=False): self.model = Doc2Vec.load(model_fname) self.doc2idx = {el:idx for idx, el in enumerate(self.model.docvecs.doctags.keys())} self.use_notebook = use_notebook
Example #4
Source File: sent_eval.py From embedding with MIT License | 5 votes |
def __init__(self, model_path="data/lda.results", tokenizer_name="mecab"): self.tokenizer = get_tokenizer(tokenizer_name) self.all_topics = self.load_results(model_path + ".results") self.model = LdaModel.load(model_path + ".model")
Example #5
Source File: doc2vec.py From KATE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_doc2vec(mod_file): return Doc2Vec.load(mod_file)
Example #6
Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License | 5 votes |
def load_dict(self, dict_name, path=config.DICT_DIR): """ path: config.DICT_DIR config.DICT_EX_DIR """ if dict_name not in self.dict_manager: dict_object = {} cur_dir = os.path.dirname(__file__) path = os.path.join(cur_dir, '../resources') ''' load dict from file ''' file_name = path + '/dict_%s.txt' % dict_name print('load dict from file %s \n' % file_name) f_dict = utils.create_read_file(file_name) for idx, line in enumerate(f_dict): line = line.strip().split('\t') if len(line) == 1: dict_object[line[0]] = idx + 1 elif len(line) == 2: dict_object[line[0]] = eval(line[1]) else: raise NotImplementedError self.dict_manager[dict_name] = dict_object return self.dict_manager[dict_name]
Example #7
Source File: dict_utils.py From Semantic-Texual-Similarity-Toolkits with MIT License | 5 votes |
def load_doc2vec(self): dict_name = 'doc2vec' if dict_name not in self.dict_manager: from gensim.models import Doc2Vec model = Doc2Vec.load(config.EX_DICT_DIR + '/doc2vec.model') self.dict_manager[dict_name] = model return self.dict_manager[dict_name]
Example #8
Source File: test_vec4ir.py From vec4ir with MIT License | 5 votes |
def test_doc2vec_inference_saveload(): tagged_docs = [TaggedDocument(simple_preprocess(doc), [i]) for i, doc in enumerate(documents)] model = Doc2Vec(tagged_docs, epochs=1, min_count=1, vector_size=10) model.save(TEST_FILE) del model model = Doc2Vec.load(TEST_FILE) os.remove(TEST_FILE) d2v = Doc2VecInference(model, DEFAULT_ANALYZER) match_op = Matching() retrieval = Retrieval(d2v, matching=match_op).fit(documents) result = retrieval.query("scientists") assert result[0] == 1
Example #9
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def retrainModel(vectorFile, dataFile, outputFile, iterations): documents = LabeledLineSentence("Data\\" + dataFile) model = Doc2Vec.load("Models\\" + vectorFile) for epoch in range(iterations): model.train(documents) model.save("Models\\" + outputFile)
Example #10
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def testModel(inputFile): model = Doc2Vec.load("Models\\" + inputFile) while True: choice = input("Press 1 to compare documents within the model to each other.\n" "Press 2 to run similarity tests on individual words.\n" "Press 3 to get the top related subreddits for an inferred new vector (comment).\n" "Hit any key to exit.\n") if choice == "1": docChoice = input("Enter the subreddit you want to test.\n") print(model.docvecs.most_similar(docChoice)) elif choice == "2": wordChoice = input("Enter the word you wish to analyze.\n").lower() print(model.most_similar(wordChoice)) elif choice == "3": with open("testing.txt") as t: resultList = [] testDocs = t.readlines() for doc in testDocs: doc = doc.split("\t") tag = doc[0] body = doc[1] newVec = model.infer_vector(body.split()) resultList.append("The original category is {}: {}\n {}\n". format(tag, body, model.docvecs.most_similar(positive=[newVec]))) with open("clusteredResults.txt", "a") as x: for element in resultList: x.write(element) else: break
Example #11
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def newKMeansModel(vectorFile, outputFile, numClusters): # https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering model = Doc2Vec.load("Models\\" + vectorFile) docVecs = model.docvecs.doctag_syn0 km = KMeans(n_clusters=numClusters) print("Starting") km.fit(docVecs) print("Fitting Data") joblib.dump(km, outputFile)
Example #12
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def loadKMeansModel(vectorFile, clusterFile, csvFile): # https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering model = Doc2Vec.load("Models\\" + vectorFile) km = joblib.load(clusterFile) clusters = km.labels_.tolist() cluster_info = {'labels': model.docvecs.offset2doctag, "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in model.docvecs.offset2doctag], 'clusters': clusters} sentenceDF = pd.DataFrame(cluster_info, index=[clusters], columns=['labels', "index, wordcount and repeated words", 'clusters']) print(sentenceDF) sentenceDF.to_csv(csvFile)
Example #13
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def newDBSCANModel(vectorFile, outputFile): model = Doc2Vec.load("Models\\" + vectorFile) vecs = [] for doc in range(0, len(model.docvecs)): doc_vec = model.docvecs[doc] # print doc_vec vecs.append(doc_vec.reshape((1, 300))) doc_vecs = np.array(vecs, dtype='float') # TSNE expects float type values # print doc_vecs docs = [] for i in doc_vecs: docs.append(i[0]) db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs) joblib.dump(db, outputFile) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) clusters = db.labels_.tolist() cluster_info = {'labels': model.docvecs.offset2doctag, "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in model.docvecs.offset2doctag], 'clusters': clusters} sentenceDF = pd.DataFrame(cluster_info, index=[clusters], columns=['labels', "index, wordcount and repeated words", 'clusters']) print(sentenceDF) sentenceDF.to_csv("DBSCAN.csv") print('Estimated number of clusters: %d' % n_clusters_)
Example #14
Source File: echoDoc0.1.py From EchoBurst with MIT License | 5 votes |
def plotModel3D(vectorFile, numClusters): # http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html model = Doc2Vec.load("Models\\" + vectorFile) docVecs = model.docvecs.doctag_syn0 reduced_data = PCA(n_components=10).fit_transform(docVecs) kmeans = KMeans(init='k-means++', n_clusters=numClusters, n_init=10) fig = plt.figure(1, figsize=(10, 10)) ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) kmeans.fit(reduced_data) labels = kmeans.labels_ ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float)) ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) # Plot the ground truth fig = plt.figure(1, figsize=(10, 10)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) plt.cla() ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float)) ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) plt.show()
Example #15
Source File: title2vec.py From OAG with MIT License | 5 votes |
def load_model(self): logger.info('loading doc2vec model name %s', self.model_fname) self.model = Doc2Vec.load(join(self.model_dir, self.model_fname)) logger.info('doc2vec model %s loaded', self.model_fname) return self.model