Python gensim.models.Word2Vec.load_word2vec_format() Examples
The following are 9
code examples of gensim.models.Word2Vec.load_word2vec_format().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gensim.models.Word2Vec
, or try the search function
.
Example #1
Source File: doc_word2vec.py From KATE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_w2v(file): model = Word2Vec.load_word2vec_format(file, binary=True) return model
Example #2
Source File: score_alignments.py From policy_diffusion with MIT License | 5 votes |
def load_word2vec(): model = Word2Vec.load_word2vec_format('/mnt/data/sunlight/GoogleNews-vectors-negative300.bin', binary=True) return model
Example #3
Source File: words2map.py From words2map with MIT License | 5 votes |
def load_derived_vectors(filename): # loads derived vectors from a previous words2map as a standalone Gensim Word2Vec model (https://radimrehurek.com/gensim/models/word2vec.html) filepath = getcwd() + "/derived_vectors/" + filename model = Word2Vec.load_word2vec_format(filepath, binary=False) return model
Example #4
Source File: similarity.py From 4lang with MIT License | 5 votes |
def get_vec_sim(self): model_fn = self.config.get('vectors', 'model') model_type = self.config.get('vectors', 'model_type') logging.warning('Loading model: {0}'.format(model_fn)) if model_type == 'word2vec': self.vec_model = Word2Vec.load_word2vec_format(model_fn, binary=True) elif model_type == 'gensim': self.vec_model = Word2Vec.load(model_fn) else: raise Exception('Unknown LSA model format') logging.warning('Model loaded: {0}'.format(model_fn))
Example #5
Source File: word2vec_cluster.py From word2vec-cluster with GNU General Public License v3.0 | 5 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="word2vec model path") parser.add_argument("format", help="1 = binary format, 0 = text format", type=int) parser.add_argument("k", help="number of clusters", type=int) parser.add_argument("output", help="output file") args = parser.parse_args() start = time.time() print("Load word2vec model ... ", end="", flush=True) w2v_model = Word2Vec.load_word2vec_format(args.model, binary=bool(args.format)) print("finished in {:.2f} sec.".format(time.time() - start), flush=True) word_vectors = w2v_model.wv.syn0 n_words = word_vectors.shape[0] vec_size = word_vectors.shape[1] print("#words = {0}, vector size = {1}".format(n_words, vec_size)) start = time.time() print("Compute clustering ... ", end="", flush=True) kmeans = KMeans(n_clusters=args.k, n_jobs=-1, random_state=0) idx = kmeans.fit_predict(word_vectors) print("finished in {:.2f} sec.".format(time.time() - start), flush=True) start = time.time() print("Generate output file ... ", end="", flush=True) word_centroid_list = list(zip(w2v_model.wv.index2word, idx)) word_centroid_list_sort = sorted(word_centroid_list, key=lambda el: el[1], reverse=False) file_out = open(args.output, "w") file_out.write("WORD\tCLUSTER_ID\n") for word_centroid in word_centroid_list_sort: line = word_centroid[0] + '\t' + str(word_centroid[1]) + '\n' file_out.write(line) file_out.close() print("finished in {:.2f} sec.".format(time.time() - start), flush=True) return
Example #6
Source File: feature_generator.py From entity2vec with Apache License 2.0 | 5 votes |
def get_e2v_embedding(embeddings_file): model = Word2Vec.load_word2vec_format(embeddings_file, binary=True) return model
Example #7
Source File: wordtwovec.py From aristo-mini with Apache License 2.0 | 5 votes |
def __init__(self, model_file: str) -> None: if model_file.endswith(".bin"): self.model = Word2Vec.load_word2vec_format(model_file, binary=True) else: self.model = Word2Vec.load(model_file)
Example #8
Source File: test_analogy.py From conec with MIT License | 5 votes |
def evaluate_google(): # see https://code.google.com/archive/p/word2vec/ # load pretrained google embeddings and test from gensim.models import Word2Vec model_google = Word2Vec.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True) _ = accuracy(model_google, "data/questions-words.txt", False)
Example #9
Source File: kaggle.py From dl-models-for-qa with Apache License 2.0 | 4 votes |
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300, is_custom=False): word2vec = None if is_custom: word2vec = Word2Vec.load(w2vfile) else: word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True) vocab_size = len(word2idx) + 1 embedding_weights = np.zeros((vocab_size, w2v_embed_size)) for word, index in word2idx.items(): try: embedding_weights[index, :] = word2vec[word.lower()] except KeyError: pass # keep as zero (not ideal, but what else can we do?) return embedding_weights