Python Examples of gensim.models.Word2Vec.load

Source File: data_utils.py From CCKS2019-Chinese-Clinical-NER with MIT License

6 votes

def get_embedding_matrix(model_filepath, word2id):
    """
    Get the embedding matrix of the word2vec model
    :param model_filepath: the file path to the pre-build word2vec model
    :param word2id: the directory mapping from word to id
    :return: the embedding matrix of the word2vec model
    """
    word2vec_model = Word2Vec.load(model_filepath)
    embeddings_dict = __get_embedding_dict(model_filepath)
    embedding_matrix = np.zeros((len(word2id) + 1, word2vec_model.vector_size))
    for word, idx in word2id.items():
        embedding_vector = embeddings_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[idx] = embedding_vector

    return embedding_matrix

Source File: word2vec_helpers.py From chinese-text-classification-with-cnn-tf with Apache License 2.0

6 votes

def embedding_sentences(sentences, embedding_size = 128, window = 5, min_count = 5, file_to_load = None, file_to_save = None):
    if file_to_load is not None:
        w2vModel = Word2Vec.load(file_to_load)
    else:
        w2vModel = Word2Vec(sentences, size = embedding_size, window = window, min_count = min_count, workers = multiprocessing.cpu_count())
        if file_to_save is not None:
            w2vModel.save(file_to_save)
    all_vectors = []
    embeddingDim = w2vModel.vector_size
    embeddingUnknown = [0 for i in range(embeddingDim)]
    for sentence in sentences:
        this_vector = []
        for word in sentence:
            if word in w2vModel.wv.vocab:
                this_vector.append(w2vModel[word])
            else:
                this_vector.append(embeddingUnknown)
        all_vectors.append(this_vector)
    return all_vectors

Source File: word_eval.py From embedding with MIT License

6 votes

def load_vectors(self, vecs_fname, method):
        if method == "word2vec":
            model = Word2Vec.load(vecs_fname)
            words = model.wv.index2word
            vecs = model.wv.vectors
        else:
            words, vecs = [], []
            with open(vecs_fname, 'r', encoding='utf-8') as f:
                if "fasttext" in method:
                    next(f)  # skip head line
                for line in f:
                    if method == "swivel":
                        splited_line = line.strip().split("\t")
                    else:
                        splited_line = line.strip().split(" ")
                    words.append(splited_line[0])
                    vec = [float(el) for el in splited_line[1:]]
                    vecs.append(vec)
        unit_vecs = normalize(vecs, norm='l2', axis=1)
        dictionary = {}
        for word, vec in zip(words, unit_vecs):
            dictionary[word] = vec
        return dictionary, words, unit_vecs

Source File: word_utils.py From embedding with MIT License

6 votes

def load_word_embeddings(self, vecs_fname, method):
        if method == "word2vec":
            model = Word2Vec.load(vecs_fname)
            words = model.wv.index2word
            vecs = model.wv.vectors
        else:
            words, vecs = [], []
            with open(vecs_fname, 'r', encoding='utf-8') as f1:
                if "fasttext" in method:
                    next(f1)  # skip head line
                for line in f1:
                    if method == "swivel":
                        splited_line = line.replace("\n", "").strip().split("\t")
                    else:
                        splited_line = line.replace("\n", "").strip().split(" ")
                    words.append(splited_line[0])
                    vec = [float(el) for el in splited_line[1:]]
                    vecs.append(vec)
        return words, vecs

Source File: utils.py From mat2vec with MIT License

6 votes

def compute_epoch_accuracies(root, prefix, analogy_file):
    filenames = glob.glob(os.path.join(root, prefix+"_epoch*.model"))
    nr_epochs = len(filenames)
    accuracies = dict()
    losses = [0] * nr_epochs
    for filename in filenames:
        epoch = int(re.search("\d+\.model", filename).group()[:-6])
        m = Word2Vec.load(filename)
        losses[epoch] = m.get_latest_training_loss()
        sections = m.wv.accuracy(analogy_file)
        for sec in sections:
            if sec["section"] not in accuracies:
                accuracies[sec["section"]] = [0] * nr_epochs
            correct, incorrect = len(sec["correct"]), len(sec["incorrect"])
            if incorrect > 0:
                accuracy = correct / (correct + incorrect)
            else:
                accuracy = 0
            accuracies[sec["section"]][epoch] = (correct, incorrect, accuracy)
        save_obj(accuracies, os.path.join("models", prefix + "_accuracies"))
        save_obj(np.concatenate([np.array([losses[0]]), np.diff(losses)]), os.path.join("models", prefix + "_loss"))

Source File: main.py From nonce2vec with MIT License

6 votes

def _check_men(args):
    """Check embeddings quality.

    Calculate correlation with the similarity ratings in the MEN dataset.
    """
    logger.info('Checking embeddings quality against MEN similarity ratings')
    logger.info('Loading word2vec model...')
    model = Word2Vec.load(args.w2v_model)
    logger.info('Model loaded')
    system_actual = []
    # This is needed because we may not be able to calculate cosine for
    # all pairs
    human_actual = []
    count = 0
    for (first, second), human in Samples(source='men', shuffle=False):
        if first not in model.wv.vocab or second not in model.wv.vocab:
            logger.error('Could not find one of more pair item in model '
                         'vocabulary: {}, {}'.format(first, second))
            continue
        sim = _cosine_similarity(model.wv[first], model.wv[second])
        system_actual.append(sim)
        human_actual.append(human)
        count += 1
    spr = _spearman(human_actual, system_actual)
    logger.info('SPEARMAN: {} calculated over {} items'.format(spr, count))

Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0

6 votes

def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified")

Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0

6 votes

def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified")

Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0

6 votes

def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified")

Source File: train_word2vec.py From text-classification with Apache License 2.0

5 votes

def demo():
    model = Word2Vec.load(config['model_file'])

    print("Provide three testing modes\n")
    print("Input a word, return 10 most similar words")
    print("Input two words, return their cosine similarity")
    print("Input three words, return the inference word")

    while True:
        try:
            query = input()
            q_list = query.split()

            if len(q_list) == 1:
                print("The 10 most similar words:")
                res = model.most_similar(q_list[0],topn = 10)
                for item in res:
                    print(item[0]+","+str(item[1]))

            elif len(q_list) == 2:
                print("Cosine similarity:")
                res = model.similarity(q_list[0],q_list[1])
                print(res)
            
            else:
                print("%s to %s, is like %s to " % (q_list[0],q_list[2],q_list[1]))
                res = model.most_similar([q_list[0],q_list[1]], [q_list[2]], topn= 10)
                for item in res:
                    print(item[0]+","+str(item[1]))
            print("----------------------------")
        except Exception as e:
            print(repr(e))

Source File: wordtwovec.py From aristo-mini with Apache License 2.0

5 votes

def __init__(self, model_file: str) -> None:
        if model_file.endswith(".bin"):
            self.model = Word2Vec.load_word2vec_format(model_file, binary=True)
        else:
            self.model = Word2Vec.load(model_file)

Source File: SCDV.py From SCDV with MIT License

5 votes

def read_GMM(idx_name, idx_proba_name):
    # Loads cluster assignments and probability of cluster assignments. 
    idx = joblib.load(idx_name)
    idx_proba = joblib.load(idx_proba_name)
    print("Cluster Model Loaded...")
    return (idx, idx_proba)

Source File: TopicCoherence.py From SCDV with MIT License

5 votes

def read_GMM(idx_name, idx_proba_name):
    # Loads cluster assignments and probability of cluster assignments.
    idx = joblib.load(idx_name)
    idx_proba = joblib.load(idx_proba_name)
    print("Cluster Model Loaded...")
    return (idx, idx_proba)

Source File: SCDV.py From SCDV with MIT License

5 votes

def read_GMM(idx_name, idx_proba_name):
    # Loads cluster assignments and probability of cluster assignments.
    idx = joblib.load(idx_name)
    idx_proba = joblib.load(idx_proba_name)
    print("Cluster Model Loaded...")
    return (idx, idx_proba)

Source File: SCDV.py From SCDV with MIT License

5 votes

def read_GMM(idx_name, idx_proba_name):
    # Loads cluster assignments and probability of cluster assignments.
    idx = joblib.load(idx_name)
    idx_proba = joblib.load(idx_proba_name)
    print("Cluster Model Loaded...")
    return (idx, idx_proba)

Source File: load_data.py From pynlp with MIT License

5 votes

def load_char_word_static_data(file, data_size=None):
    model = Word2Vec.load('../output/word2vec/word2vec.model')

    path = os.path.join(os.path.dirname(__file__), file)
    df = pd.read_csv(path)
    p = df['sentence1'].values[0:data_size]
    h = df['sentence2'].values[0:data_size]
    label = df['label'].values[0:data_size]

    p, h, label = shuffle(p, h, label)

    p_c_index, h_c_index = char_index(p, h)

    p_seg = map(lambda x: list(jieba.cut(x)), p)
    h_seg = map(lambda x: list(jieba.cut(x)), h)

    p_w_vec = list(map(lambda x: w2v(x, model), p_seg))
    h_w_vec = list(map(lambda x: w2v(x, model), h_seg))

    p_w_vec = list(map(lambda x: w2v_process(x), p_w_vec))
    h_w_vec = list(map(lambda x: w2v_process(x), h_w_vec))

    return p_c_index, h_c_index, p_w_vec, h_w_vec, label


# 加载char_index与动态词向量的训练数据

Source File: model.py From cakechat with Apache License 2.0

5 votes

def _get_w2v_model(corpus_name,
                   voc_size,
                   model_resolver_factory=None,
                   tokenized_lines=None,
                   vec_size=TOKEN_REPRESENTATION_SIZE,
                   window_size=W2V_WINDOW_SIZE,
                   skip_gram=USE_SKIP_GRAM):
    _logger.info('Getting w2v model')

    model_path = get_w2v_model_path(corpus_name, voc_size, vec_size, window_size, skip_gram)
    model_resolver = model_resolver_factory(model_path) if model_resolver_factory else DummyFileResolver(model_path)

    if not model_resolver.resolve():
        if not tokenized_lines:
            raise ModelLoaderException(
                'Tokenized corpus "{}" was not provided, so w2v model can\'t be trained.'.format(corpus_name))

        # bin model is not present on the disk, so get it
        model = _train_model(tokenized_lines, voc_size, vec_size, window_size, skip_gram)
        _save_model(model, model_path)
    else:
        # bin model is on the disk, load it
        model = _load_model(model_path)

    _logger.info('Successfully got w2v model\n')

    return model

Source File: Step6_all_feature_extract.py From resume_job_matching with Apache License 2.0

5 votes

def getAllFeatures(train, mapper):
    print "this is getAllFeatures"
    # every record has a cluster value calculated by lda
    w2c_f, w2c_w = 10, 14
    lda_dict_1 = util.read_dict(util.features_prefix + 'id_lda_256.pkl')
    lda_dict_2 = util.read_dict(util.features_prefix + 'id_lda_512.pkl')
    k_mean_dict_1 = util.read_dict(util.features_prefix + 'c_k_all_64.pkl')
    k_mean_dict_2 = util.read_dict(util.features_prefix + 'c_k_all_128.pkl')
    sentence_dict_path = util.txt_prefix + 'id_sentences.pkl'
    word2vec_path = util.txt_prefix + str(w2c_f) + 'features_1minwords_' + str(w2c_w) + 'context.pkl'
    sentence_dic = util.read_dict(sentence_dict_path)
    model = Word2Vec.load(word2vec_path)

    train_X = train[features]
    train_X = mapper.transform(train_X)  # .values
    new_train_X = []
    for i in xrange(len(train_X)):
        id = train_X[i][0]
        lda_1 = lda_dict_1[id]
        lda_2 = lda_dict_2[id]
        s = sentence_dic.get(id)
        f = np.concatenate(([train_X[i][1:].astype(np.float32)],
                            [sentence_to_matrix_vec(s, model, w2c_f, k_mean_dict_1, k_mean_dict_2)]), axis=1)[0]
        f = np.concatenate(([f], [[lda_1, lda_2]]), axis=1)[0]
        new_train_X.append(f)
    new_train_X = np.array(new_train_X)
    return new_train_X

Source File: train_word2vec.py From text-classification with Apache License 2.0

5 votes

def segment():
    # jieba custom setting.
    DATA_DIR = os.getcwd() + '/data/user_dict'
    jieba.load_userdict(os.path.join(DATA_DIR, 'Company.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Concept.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Consumer.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Holder.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'HoldingCompany.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'MainComposition.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Manager.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Material.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'OtherCompetitor.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Supplier.txt'))
    jieba.load_userdict(os.path.join(DATA_DIR, 'Finance.txt'))

    # load stopwords set
    stopword_set = set()
    with open(os.getcwd()+'/data/user_dict/stopWord.txt', 'r', encoding='utf-8') as stopwords:
        for stopword in stopwords:
            stopword_set.add(stopword.strip('\n'))

    output = open(config['input_seg'], 'w', encoding='utf-8')
    with open(config['input_raw'], 'r', encoding='utf-8') as content :
        for texts_num, line in enumerate(content):
            line = line.strip('\n')
            words = jieba.cut(line, cut_all=False)
            for word in words:
                if word not in stopword_set:
                    output.write(word + ' ')
            output.write('\n')

            if (texts_num + 1) % 10000 == 0:
                logging.info("Segmented %d th articles" % (texts_num + 1))
    output.close()

Source File: similarity.py From 4lang with MIT License

5 votes

def get_vec_sim(self):
        model_fn = self.config.get('vectors', 'model')
        model_type = self.config.get('vectors', 'model_type')
        logging.warning('Loading model: {0}'.format(model_fn))
        if model_type == 'word2vec':
            self.vec_model = Word2Vec.load_word2vec_format(model_fn,
                                                           binary=True)
        elif model_type == 'gensim':
            self.vec_model = Word2Vec.load(model_fn)
        else:
            raise Exception('Unknown LSA model format')
        logging.warning('Model loaded: {0}'.format(model_fn))

Source File: evaluate.py From embeddings with Apache License 2.0

5 votes

def evaluate(filename, fname):
	counter = 0
	top3Counter = 0
	top5c = 0
	linec = 0
	model = Word2Vec.load(fname)
	questions = []
	with open(filename) as test:
		for line in test:
			questions.append(line)

	for line in questions:
		if line.startswith(':'):
			try:
				print('Accuracy: ' + str(100*counter/linec) + '\n')
			except:
				pass
			print("Evaluating " + line.rstrip('\n'))
		else:	
			try:
				linec += 1
				source, target, question, answer = line.encode('utf-8').decode('utf-8').lower().rstrip('\n').replace('(', '').replace(')', '').split(' ')
				# word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])
				result = model.wv.most_similar(positive=[target, question], negative=[source])
				if result[0][0] == answer:
					counter += 1
				print('Accuracy: ' + str(100*counter/linec), end = '\r')
				for i in range(0, 3):
					if result[i][0] == answer:
						top3Counter += 1
						break
				for i in range(0, 10):
					if result[i][0] == answer:
						top5c += 1
						break
			except:
				pass
	print('Correct guess: ' + str(100 * counter / linec))
	print('Correct guess in top 3:' + str(100 * top3Counter / linec))
	print('Correct guess in top 10:' + str(100 * top5c / linec))
	return None

Source File: load_data.py From text_matching with Apache License 2.0

5 votes

def load_char_word_static_data(file, data_size=None):
    model = Word2Vec.load('../output/word2vec/word2vec.model')

    path = os.path.join(os.path.dirname(__file__), file)
    df = pd.read_csv(path)
    p = df['sentence1'].values[0:data_size]
    h = df['sentence2'].values[0:data_size]
    label = df['label'].values[0:data_size]

    p, h, label = shuffle(p, h, label)

    p_c_index, h_c_index = char_index(p, h)

    p_seg = map(lambda x: list(jieba.cut(x)), p)
    h_seg = map(lambda x: list(jieba.cut(x)), h)

    p_w_vec = list(map(lambda x: w2v(x, model), p_seg))
    h_w_vec = list(map(lambda x: w2v(x, model), h_seg))

    p_w_vec = list(map(lambda x: w2v_process(x), p_w_vec))
    h_w_vec = list(map(lambda x: w2v_process(x), h_w_vec))

    return p_c_index, h_c_index, p_w_vec, h_w_vec, label


# 加载char_index与动态词向量的训练数据

Source File: word2vec_helpers.py From DetectMaliciousURL with Apache License 2.0

5 votes

def embedding_sentences(sentences, embedding_size = 128, window = 5, min_count = 5, file_to_load = None, file_to_save = None):
    '''
    embeding_size 词嵌入维数
    window : 上下文窗口
    min_count : 词频少于min_count会被删除
    '''
    if file_to_load is not None:
        w2vModel = Word2Vec.load(file_to_load)
    else:
        w2vModel = Word2Vec(sentences, size = embedding_size, window = window, min_count = min_count, workers = multiprocessing.cpu_count())
        if file_to_save is not None:
            w2vModel.save(file_to_save)

    all_vectors = []
    embeddingDim = w2vModel.vector_size
    # 嵌入维数
    embeddingUnknown = [0 for i in range(embeddingDim)]
    for sentence in sentences:
        this_vector = []
        for word in sentence:
            if word in w2vModel.wv.vocab:
                this_vector.append(w2vModel[word])
            else:
                this_vector.append(embeddingUnknown)
        all_vectors.append(this_vector)
    return all_vectors

Source File: word2vec_helpers.py From DetectMaliciousURL with Apache License 2.0

5 votes

def embedding_sentences(sentences, embedding_size = 128, window = 5, min_count = 5, file_to_load = None, file_to_save = None):
    '''
    embeding_size 词嵌入维数
    window : 上下文窗口
    min_count : 词频少于min_count会被删除
    '''
    if file_to_load is not None:
        w2vModel = Word2Vec.load(file_to_load)
    else:
        w2vModel = Word2Vec(sentences, size = embedding_size, window = window, min_count = min_count, workers = multiprocessing.cpu_count())
        if file_to_save is not None:
            w2vModel.save(file_to_save)

    all_vectors = []
    embeddingDim = w2vModel.vector_size
    # 嵌入维数
    embeddingUnknown = [0 for i in range(embeddingDim)]
    for sentence in sentences:
        this_vector = []
        for word in sentence:
            if word in w2vModel.wv.vocab:
                this_vector.append(w2vModel[word])
            else:
                this_vector.append(embeddingUnknown)
        all_vectors.append(this_vector)
    return all_vectors

Source File: informativeness.py From nonce2vec with MIT License

5 votes

def __init__(self, model_path, sum_filter=None, sum_thresh=None,
                 train_filter=None, train_thresh=None, sort_by=None):
        """Initialize the Informativeness instance.

        Args:
            model_path (str): The absolute path to the gensim w2v CBOW model.
            sum_filter (str): Filter for the sum initialization phase.
            sum_thresh (int): Threshold for sum filter (self and cwi filters
                              only).
            train_filter (str): Filter for the training phase.
            train_thresh (int): Threshold for the train filter (self and cwi
                                filters only).
            sort_by (str): Sort context items in asc or desc of cwi values
                           before training.
        """
        self._sum_filter = sum_filter
        if sum_filter and sum_filter != 'random' and sum_thresh is None:
            raise Exception('Setting sum_filter as \'{}\' requires specifying '
                            'a threshold parameter'.format(sum_filter))
        self._sum_thresh = sum_thresh
        self._train_filter = train_filter
        if train_filter and train_filter != 'random' and train_thresh is None:
            raise Exception('Setting train_filter as \'{}\' requires '
                            'specifying a threshold parameter'
                            .format(train_filter))
        self._train_thresh = train_thresh
        self._model = Word2Vec.load(model_path)
        self._sort_by = sort_by

Source File: model.py From cakechat with Apache License 2.0

5 votes

def _load_model(model_path):
    _logger.info('Loading model from {}'.format(model_path))
    model = Word2Vec.load(model_path, mmap='r')
    _logger.info('Model "{}" has been loaded.'.format(os.path.basename(model_path)))
    return model

Source File: data_utils.py From CCKS2019-Chinese-Clinical-NER with MIT License

5 votes

def load_vocab(vocab_filepath):
    """
    Load the dictionary mapping from word to id
    :param vocab_filepath: the file path to the pre-built dictionary
    :return: the dictionary mapping from word to id
    """
    with open(vocab_filepath, "rb") as fr:
        word2id = pickle.load(fr)

    return word2id

Source File: main.py From IDEA with MIT License

5 votes

def build_phrase(doc):
    # load phrase model
    return trigram[bigram[doc]]

Source File: main.py From IDEA with MIT License

5 votes

def load_phrase():
    global bigram
    global trigram
    bigram = Phrases.load(os.path.join("..", "model", "bigram.model"))
    trigram = Phrases.load(os.path.join("..", "model", "trigram.model"))

Source File: main.py From IDEA with MIT License

5 votes

def load_obj(filename):
    with open(filename) as fin:
        return cPickle.load(fin)

Python gensim.models.Word2Vec.load() Examples