Python Examples of keras.layers.embeddings.Embedding

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: model_zoo.py From visual_turing_test-tutorial with MIT License

6 votes

def create(self):

        assert self._config.textual_embedding_dim == 0, \
                'Embedding cannot be learnt but must be fixed'

        language_forward = Sequential()
        language_forward.add(self._config.recurrent_encoder(
            self._config.hidden_state_dim, return_sequences=False,
            input_shape=(self._config.max_input_time_steps, self._config.input_dim)))
        self.language_forward = language_forward

        language_backward = Sequential()
        language_backward.add(self._config.recurrent_encoder(
            self._config.hidden_state_dim, return_sequences=False,
            go_backwards=True,
            input_shape=(self._config.max_input_time_steps, self._config.input_dim)))
        self.language_backward = language_backward

        self.add(Merge([language_forward, language_backward]))
        self.deep_mlp()
        self.add(Dense(self._config.output_dim))
        self.add(Activation('softmax'))

Source File: model_zoo.py From visual_turing_test-tutorial with MIT License

6 votes

def create(self):
        self._input_name = 'text'
        self._output_name = 'output'

        self.add_input(
                name=self._input_name, 
                input_shape=(self._config.max_input_time_steps, self._config.input_dim,))
        self.inputs['text'].input = T.imatrix()
        self.add_node(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim, 
                mask_zero=True), 
                name='embedding', input='text')
        self.add_node(
                self._config.recurrent_encoder(
                    self._config.hidden_state_dim, 
                    return_sequences=False,
                    go_backwards=self._config.go_backwards),
                name='recurrent', input='embedding') 
        self.add_node(Dropout(0.5), name='dropout', input='recurrent')
        self.add_node(Dense(self._config.output_dim), name='dense', input='dropout')
        self.add_node(Activation('softmax'), name='softmax', input='dense')
        self.add_output(name=self._output_name, input='softmax')

Source File: model.py From DeepSequenceClassification with GNU General Public License v2.0

6 votes

def gen_model(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Model")
    model = Sequential()
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add(Embedding(vocab_size, embedding_size, input_length=maxlen))
    logger.info("Added Embedding Layer")
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    for i in xrange(num_hidden_layers):
        model.add(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
        logger.info("Added %s Layer" % RNN_LAYER_TYPE)
        model.add(Dropout(0.5))
        logger.info("Added Dropout Layer")
    model.add(RNN_CLASS(output_dim=output_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
    logger.info("Added %s Layer" % RNN_LAYER_TYPE)
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    model.add(TimeDistributedDense(output_size, activation="softmax"))
    logger.info("Added Dropout Layer")
    logger.info("Created model with following config:\n%s" % json.dumps(model.get_config(), indent=4))
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model

Source File: model_zoo.py From visual_turing_test-tutorial with MIT License

6 votes

def textual_embedding_fixed_length(self, language_model, mask_zero):
        """
        In contrast to textual_embedding, it produces a fixed length output.
        """
        if self._config.textual_embedding_dim > 0:
            print('Textual Embedding with fixed length is on')
            language_model.add(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim,
                input_length=self._config.max_input_time_steps,
                mask_zero=mask_zero))
        else:
            print('Textual Embedding with fixed length is off')
            language_model.add(Reshape(
                input_shape=(self._config.max_input_time_steps, self._config.input_dim),
                dims=(self._config.max_input_time_steps, self._config.input_dim)))
            if mask_zero:
                language_model.add(Masking(0))
        return language_model

Source File: gather_test.py From ccg2lambda with Apache License 2.0

6 votes

def setUp(self):
        self.embs = np.array([
            [0, 0, 0],
            [1, 10, 100],
            [2, 20, 200],
            [3, 30, 300],
            [4, 40, 400],
            [5, 50, 500],
            [6, 60, 600],
            [7, 70, 700],
            [8, 80, 800],
            [9, 90, 900]],
            dtype='float32')
        self.emb_dim = self.embs.shape[1]
        self.token_emb = Embedding(
            input_dim=self.embs.shape[0],
            output_dim=self.emb_dim,
            weights=[self.embs],
            mask_zero=False, # Reshape layer does not support masking.
            trainable=True,
            name='token_emb')
        self.gather_layer = Lambda(gather3, output_shape=gather_output_shape3)

Source File: model_zoo.py From visual_turing_test-tutorial with MIT License

6 votes

def textual_embedding(self, language_model, mask_zero):
        """
        Note:
        * mask_zero only makes sense if embedding is learnt
        """
        if self._config.textual_embedding_dim > 0:
            print('Textual Embedding is on')
            language_model.add(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim, 
                mask_zero=mask_zero))
        else:
            print('Textual Embedding is off')
            language_model.add(Reshape(
                input_shape=(self._config.max_input_time_steps, self._config.input_dim),
                dims=(self._config.max_input_time_steps, self._config.input_dim)))
            if mask_zero:
                language_model.add(Masking(0))
        return language_model

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: models.py From neurowriter with MIT License

6 votes

def create(inputtokens, vocabsize, units=16, dropout=0, embedding=32):

        input_ = Input(shape=(inputtokens,), dtype='int32')

        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)

        # Bidirectional LSTM layer
        net = BatchNormalization()(net)
        net = Bidirectional(CuDNNLSTM(units))(net)
        net = Dropout(dropout)(net)

        # Output layer
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)

        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model

Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)

Source File: embeddings_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())

Source File: dl_models.py From Sarcasm-Detection with MIT License

6 votes

def build_embedding_layer(word2index, emb_type='glove', embedding_dim=300, max_len=40, trainable=True):
    vocab_size = len(word2index) + 1
    if 'glove' in emb_type:
        word2vec_map = utils.load_vectors(filename='glove.6B.%dd.txt' % embedding_dim)
        emb_layer = pretrained_embedding_layer(word2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    elif 'emoji' in emb_type:
        emoji2vec_map = utils.load_vectors(filename='emoji_embeddings_%dd.txt' % embedding_dim)
        emb_layer = pretrained_embedding_layer(emoji2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    elif 'random' in emb_type:
        words = word2index.keys()
        random2vec_map = utils.build_random_word2vec(words, embedding_dim=embedding_dim, variance=1)
        emb_layer = pretrained_embedding_layer(random2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    else:
        emb_layer = Embedding(vocab_size, embedding_dim, input_length=max_len, trainable=trainable)
        emb_layer.build((None,))
    return emb_layer

Source File: lstm.py From hyperas with MIT License

5 votes

def model(X_train, X_test, y_train, y_test, max_features, maxlen):
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(128))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
                                   verbose=1,
                                   save_best_only=True)

    model.fit(X_train, y_train,
              batch_size={{choice([32, 64, 128])}},
              nb_epoch=1,
              validation_split=0.08,
              callbacks=[early_stopping, checkpointer])

    score, acc = model.evaluate(X_test, y_test, verbose=0)

    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

Source File: kerasClassify.py From emailinsight with MIT License

5 votes

def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1]

Source File: lstm.py From dga_predict with GNU General Public License v2.0

5 votes

def build_model(max_features, maxlen):
    """Build LSTM model"""
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(128))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop')

    return model

Source File: models.py From neurowriter with MIT License

5 votes

def create(inputtokens, vocabsize, layers=1, units=16, dropout=0, embedding=32):
        
        input_ = Input(shape=(inputtokens,), dtype='int32')
        
        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)
            
        # Bidirectional LSTM layer
        net = BatchNormalization()(net)
        net = Bidirectional(CuDNNLSTM(units, return_sequences=(layers > 1)))(net)
        net = Dropout(dropout)(net)
            
        # Rest of LSTM layers with residual connections (if any)
        for i in range(1, layers):
            if i < layers-1:
                block = BatchNormalization()(net)
                block = CuDNNLSTM(2*units, return_sequences=True)(block)
                block = Dropout(dropout)(block)
                net = add([block, net])
            else:
                net = BatchNormalization()(net)
                net = CuDNNLSTM(2*units)(net)
                net = Dropout(dropout)(net)
                    
        # Output layer
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)
        
        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model

Source File: models.py From neurowriter with MIT License

5 votes

def create(inputtokens, vocabsize, kernels=64, wavenetblocks=1, dropout=0, embedding=32):
        kernel_size = 7
        maxdilation = inputtokens
        
        input_ = Input(shape=(inputtokens,), dtype='int32')
        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)
        # Wavenet starts!
        net = BatchNormalization()(net)
        net = Conv1D(kernels, 1, activation='tanh')(net)
        skip_connections = []
        for i in range(wavenetblocks):
            net, skip = wavenetblock(maxdilation, dropout, kernels, kernel_size)(net)
            skip_connections.append(skip)
        if wavenetblocks > 1:
            net = add(skip_connections)
        else:
            net = skip
        net = Conv1D(kernels, 1, activation='tanh')(net)
        net = Conv1D(kernels, 1)(net)
        net = Flatten()(net)
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)
        
        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model

Source File: rnn_text.py From EventForecast with GNU Lesser General Public License v3.0

5 votes

def model_rnn(vocab, weights, dataPath, batchn, epoch, repeat):
    global LEN
    global DIM
    global BATCH
    testx, testy = build_dataset('%s%d'%(dataPath, 2528), vocab, weights=weights)
    testx = np.array(testx, dtype=np.float64)
    testy = np.array(testy, dtype=np.float64)
    
    # build and fit model
    model = Sequential()
    #model.add(Embedding(weights.shape[0],weights.shape[1], input_length=LEN, mask_zero=True,weights=[weights]))
    model.add(Bidirectional(LSTM(50, activation='relu', return_sequences=True), input_shape=(LEN, DIM)))
    model.add(Bidirectional(LSTM(50, activation='relu')))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    sgd = opt.SGD(lr=0.1, decay=1e-2, momentum=0.9)
    model.compile(loss='mean_squared_error', optimizer='adam')
    print(model.summary())
    #model.fit_generator(train_data_generator2('news_50_bin/news_stem_'), 500, epochs=10, verbose=2, validation_data=None)
    index = 0
    while index < epoch:
        data, result = build_dataset('%s%d'%(dataPath, index%2528), vocab, weights=weights)
        for i in range(1, batchn):
            index += 1
            newData, newResult = build_dataset('%s%d'%(dataPath, index), vocab, weights=weights)
            data.extend(newData)
            result.extend(newResult)
        model.fit(np.array(data, dtype=np.float64), np.array(result, dtype=np.float64), epochs=repeat, batch_size=BATCH, verbose=0, validation_data=(testx,testy))
        model.save('hotnews_r_%d_%d.h5'%(BATCH, index))
        predict = model.predict(testx)
        error = 0
        for i in range(testy.shape[0]):
            error += abs(testy[i] - predict[i][0])/testy[i]
            #print(testy[i], predict[i][0])
        print('batch %d of %d, epoch %d, absolute error: %f'%(index%2528+1, 2528, int(index/2528)+1, error/testy.shape[0]))
        index += 1
    return model

# train cnn model

Source File: models.py From neurowriter with MIT License

5 votes

def create(inputtokens, vocabsize, convlayers=5, kernels=32,
               convdrop=0.1, denselayers=0, denseunits=64, densedrop=0.1,
               embedding=32):
        kernel_size = 2
        pool_size = 2
        if convlayers < 1:
            raise ValueError("Number of layers must be at least 1")
            
        model = Sequential()        
        # Embedding layer
        model.add(Embedding(input_dim=vocabsize, output_dim=embedding,
                            input_length=inputtokens))
        # First conv+pool layer        
        model.add(Conv1D(kernels, kernel_size, padding='causal', 
                         activation='relu'))
        model.add(Dropout(convdrop))
        model.add(MaxPooling1D(pool_size))
        # Additional dilated conv + pool layers (if possible)
        for i in range(1, convlayers):
            try:
                model.add(Conv1D(kernels, kernel_size, padding='causal', 
                                 dilation_rate=2**i, activation='relu'))
                model.add(Dropout(convdrop))
                model.add(MaxPooling1D(pool_size))
            except:
                print("Warning: not possible to add %i-th layer, moving to output" % i)
                break
                
        # Flatten and dense layers
        model.add(Flatten())
        for i in range(denselayers):
            model.add(Dense(denseunits, activation='relu'))
            model.add(Dropout(densedrop))
        # Output layer
        model.add(Dense(vocabsize, activation='softmax'))
        return model

Source File: TrajectoryTools.py From TrajLib with Apache License 2.0

5 votes

def pre_trained_embedding_layer(word_to_vec_map, word_to_index):
    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["@"].shape[0]
    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    embedding_layer = Embedding(vocab_len, emb_dim)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer

Source File: rnn_text.py From EventForecast with GNU Lesser General Public License v3.0

5 votes

def model_cnn(vocab, weights, dataPath, batchn, epoch):
    global LEN
    global DIM
    global BATCH
    testx, testy = build_dataset('%s%d'%(dataPath, 2528), vocab, weights=weights)
    testx = np.array(testx, dtype=np.float64)
    testy = np.array(testx, dtype=np.float64)
    model = Sequential()
    #model.add(Embedding(400001, 50, input_length=LEN, mask_zero=False,weights=[embedModel]))
    model.add(Conv1D(input_shape=(LEN, DIM), filters=32, kernel_size=30, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(250, activation='softmax'))
    model.add(Dense(1, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    index = 0
    while True:
        data, result = build_dataset('%s%d'%(dataPath, index%2528), vocab, weights)
        for i in range(1, batchn):
            index += 1
            newData, newResult = build_dataset('%s%d'%(dataPath, index), vocab, weights)
            data.extend(newData)
            result.extend(newResult)
        model.fit(np.array(data, dtype=np.float64), np.array(result, dtype=np.float64), epochs=10, batch_size=BATCH, verbose=2, validation_data = (testx,testy))
        model.save('hotnews_c_%d_%d.h5'%(BATCH, index))
        predict = model.predict(testx)
        for i in range(testy.shape[0]):
            print(testy[i], predict[i])
        index += 1
        if index > epoch:
            return model

Source File: utils.py From CIKM-AnalytiCup-2018 with Apache License 2.0

5 votes

def get_word_embeddings(q1, q2, nb_words, embedding_size, embedding_matrix, max_sequence_length, trainable, embedding_dropout):
    embedding = Embedding(nb_words,
                    embedding_size,
                    weights=[embedding_matrix],
                    input_length=max_sequence_length,
                    trainable=trainable)
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(embedding_dropout)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(embedding_dropout)(q2_embed)
    return q1_embed, q1_embed

Python keras.layers.embeddings.Embedding() Examples