Python load embedding matrix

5 Python code examples are found related to " load embedding matrix". You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: data.py From BERT with Apache License 2.0

6 votes

def load_embedding_matrix(embedding_file):
    print("read embedding from: %s " %embedding_file)
    d = {}
    n = 0
    with open(embedding_file, "r") as f:
        line = f.readline()
        while line:
            n += 1
            w, v = line.strip().split(" ", 1)
            d[int(w[1:])] = v
            line = f.readline()
    dim = len(v.split(" "))

    # add two index for missing and padding
    emb_matrix = np.zeros((n+2, dim), dtype=float)
    for key ,val in d.items():
        v = np.asarray(val.split(" "), dtype=float)
        emb_matrix[key] = v
    emb_matrix = np.array(emb_matrix, dtype=np.float32)
    return emb_matrix

Example 2

Source File: utils.py From TextSentimentClassification with MIT License

6 votes

def load_embedding_matrix(wv_path,int2vocabPath="dataset/training_i2v.json"):
    int2vocab=loadDict(int2vocabPath)
    vocab2int=loadDict(int2vocabPath.replace("i2v","v2i"))
    vocab_size=vocab2int["<unk>"]+1
    assert vocab_size==len(int2vocab.keys()),"Here must be a global dict, no matter static or nonstatic!"
    embedding_size=int(wv_path.split("-")[-1])
    embeddings = np.random.uniform(low=-0.05,high=0.05,size=(vocab_size, embedding_size))
    if "glove" in wv_path.split("/"):
        model = models.KeyedVectors.load_word2vec_format(wv_path, binary=False)
        embeddings[vocab_size - 1] = model['<unk>']
    else:
        model = models.Word2Vec.load(wv_path)
        infrequentWords = loadDict(os.path.dirname(wv_path)+"/infrequent.json")
        tmp = np.zeros([embedding_size, ])
        for w in infrequentWords[str(2)]:
            tmp += model[w]
        embeddings[vocab_size - 1] = tmp / len(infrequentWords[str(2)])
    for i in range(1,vocab_size-1):
        word=int2vocab[str(i)]
        embeddings[i] = model[word]
    return embeddings

Example 3

Source File: base_model.py From MatchZoo with Apache License 2.0

6 votes

def load_embedding_matrix(
        self,
        embedding_matrix: np.ndarray,
        name: str = 'embedding'
    ):
        """
        Load an embedding matrix.

        Load an embedding matrix into the model's embedding layer. The name
        of the embedding layer is specified by `name`. For models with only
        one embedding layer, set `name='embedding'` when creating the keras
        layer, and use the default `name` when load the matrix. For models
        with more than one embedding layers, initialize keras layer with
        different layer names, and set `name` accordingly to load a matrix
        to a chosen layer.

        :param embedding_matrix: Embedding matrix to be loaded.
        :param name: Name of the layer. (default: 'embedding')
        """
        self.get_embedding_layer(name).set_weights([embedding_matrix])

Example 4

Source File: data_utils.py From deep-mlsa with Apache License 2.0

5 votes

def load_embedding_matrix(config_data):
    output_directory = config_data['embeddings_directory']
    output_basename = config_data['embeddings_basename']

    path = os.path.join(output_directory, output_basename)
    matrix_fname = os.path.join(path, 'embedding_matrix.npy')
    embedding_matrix = np.load(matrix_fname)
    return embedding_matrix