Python keras.layers.embeddings.Embedding() Examples

The following are 30 code examples of keras.layers.embeddings.Embedding(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.layers.embeddings , or try the search function .
Example #1
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #2
Source File: model_zoo.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def create(self):

        assert self._config.textual_embedding_dim == 0, \
                'Embedding cannot be learnt but must be fixed'

        language_forward = Sequential()
        language_forward.add(self._config.recurrent_encoder(
            self._config.hidden_state_dim, return_sequences=False,
            input_shape=(self._config.max_input_time_steps, self._config.input_dim)))
        self.language_forward = language_forward

        language_backward = Sequential()
        language_backward.add(self._config.recurrent_encoder(
            self._config.hidden_state_dim, return_sequences=False,
            go_backwards=True,
            input_shape=(self._config.max_input_time_steps, self._config.input_dim)))
        self.language_backward = language_backward

        self.add(Merge([language_forward, language_backward]))
        self.deep_mlp()
        self.add(Dense(self._config.output_dim))
        self.add(Activation('softmax')) 
Example #3
Source File: model_zoo.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def create(self):
        self._input_name = 'text'
        self._output_name = 'output'

        self.add_input(
                name=self._input_name, 
                input_shape=(self._config.max_input_time_steps, self._config.input_dim,))
        self.inputs['text'].input = T.imatrix()
        self.add_node(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim, 
                mask_zero=True), 
                name='embedding', input='text')
        self.add_node(
                self._config.recurrent_encoder(
                    self._config.hidden_state_dim, 
                    return_sequences=False,
                    go_backwards=self._config.go_backwards),
                name='recurrent', input='embedding') 
        self.add_node(Dropout(0.5), name='dropout', input='recurrent')
        self.add_node(Dense(self._config.output_dim), name='dense', input='dropout')
        self.add_node(Activation('softmax'), name='softmax', input='dense')
        self.add_output(name=self._output_name, input='softmax') 
Example #4
Source File: model.py    From DeepSequenceClassification with GNU General Public License v2.0 6 votes vote down vote up
def gen_model(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Model")
    model = Sequential()
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add(Embedding(vocab_size, embedding_size, input_length=maxlen))
    logger.info("Added Embedding Layer")
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    for i in xrange(num_hidden_layers):
        model.add(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
        logger.info("Added %s Layer" % RNN_LAYER_TYPE)
        model.add(Dropout(0.5))
        logger.info("Added Dropout Layer")
    model.add(RNN_CLASS(output_dim=output_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
    logger.info("Added %s Layer" % RNN_LAYER_TYPE)
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    model.add(TimeDistributedDense(output_size, activation="softmax"))
    logger.info("Added Dropout Layer")
    logger.info("Created model with following config:\n%s" % json.dumps(model.get_config(), indent=4))
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model 
Example #5
Source File: model_zoo.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def textual_embedding_fixed_length(self, language_model, mask_zero):
        """
        In contrast to textual_embedding, it produces a fixed length output.
        """
        if self._config.textual_embedding_dim > 0:
            print('Textual Embedding with fixed length is on')
            language_model.add(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim,
                input_length=self._config.max_input_time_steps,
                mask_zero=mask_zero))
        else:
            print('Textual Embedding with fixed length is off')
            language_model.add(Reshape(
                input_shape=(self._config.max_input_time_steps, self._config.input_dim),
                dims=(self._config.max_input_time_steps, self._config.input_dim)))
            if mask_zero:
                language_model.add(Masking(0))
        return language_model 
Example #6
Source File: gather_test.py    From ccg2lambda with Apache License 2.0 6 votes vote down vote up
def setUp(self):
        self.embs = np.array([
            [0, 0, 0],
            [1, 10, 100],
            [2, 20, 200],
            [3, 30, 300],
            [4, 40, 400],
            [5, 50, 500],
            [6, 60, 600],
            [7, 70, 700],
            [8, 80, 800],
            [9, 90, 900]],
            dtype='float32')
        self.emb_dim = self.embs.shape[1]
        self.token_emb = Embedding(
            input_dim=self.embs.shape[0],
            output_dim=self.emb_dim,
            weights=[self.embs],
            mask_zero=False, # Reshape layer does not support masking.
            trainable=True,
            name='token_emb')
        self.gather_layer = Lambda(gather3, output_shape=gather_output_shape3) 
Example #7
Source File: model_zoo.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def textual_embedding(self, language_model, mask_zero):
        """
        Note:
        * mask_zero only makes sense if embedding is learnt
        """
        if self._config.textual_embedding_dim > 0:
            print('Textual Embedding is on')
            language_model.add(Embedding(
                self._config.input_dim, 
                self._config.textual_embedding_dim, 
                mask_zero=mask_zero))
        else:
            print('Textual Embedding is off')
            language_model.add(Reshape(
                input_shape=(self._config.max_input_time_steps, self._config.input_dim),
                dims=(self._config.max_input_time_steps, self._config.input_dim)))
            if mask_zero:
                language_model.add(Masking(0))
        return language_model 
Example #8
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #9
Source File: recurrent_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5) 
Example #10
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #11
Source File: recurrent_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5) 
Example #12
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #13
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #14
Source File: models.py    From neurowriter with MIT License 6 votes vote down vote up
def create(inputtokens, vocabsize, units=16, dropout=0, embedding=32):

        input_ = Input(shape=(inputtokens,), dtype='int32')

        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)

        # Bidirectional LSTM layer
        net = BatchNormalization()(net)
        net = Bidirectional(CuDNNLSTM(units))(net)
        net = Dropout(dropout)(net)

        # Output layer
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)

        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model 
Example #15
Source File: recurrent_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5) 
Example #16
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #17
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #18
Source File: recurrent_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_masking_correctness(layer_class):
    # Check masking: output with left padding and right padding
    # should be the same.
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(num_samples, timesteps)))
    layer = layer_class(units, return_sequences=False)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')

    left_padded_input = np.ones((num_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    right_padded_input = np.ones((num_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5) 
Example #19
Source File: embeddings_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_embedding():
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx())
    layer_test(Embedding,
               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)},
               input_shape=(3, 2, 5),
               input_dtype='int32',
               expected_output_dtype=K.floatx()) 
Example #20
Source File: dl_models.py    From Sarcasm-Detection with MIT License 6 votes vote down vote up
def build_embedding_layer(word2index, emb_type='glove', embedding_dim=300, max_len=40, trainable=True):
    vocab_size = len(word2index) + 1
    if 'glove' in emb_type:
        word2vec_map = utils.load_vectors(filename='glove.6B.%dd.txt' % embedding_dim)
        emb_layer = pretrained_embedding_layer(word2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    elif 'emoji' in emb_type:
        emoji2vec_map = utils.load_vectors(filename='emoji_embeddings_%dd.txt' % embedding_dim)
        emb_layer = pretrained_embedding_layer(emoji2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    elif 'random' in emb_type:
        words = word2index.keys()
        random2vec_map = utils.build_random_word2vec(words, embedding_dim=embedding_dim, variance=1)
        emb_layer = pretrained_embedding_layer(random2vec_map, word2index, embedding_dim, vocab_size, trainable=trainable)
    else:
        emb_layer = Embedding(vocab_size, embedding_dim, input_length=max_len, trainable=trainable)
        emb_layer.build((None,))
    return emb_layer 
Example #21
Source File: lstm.py    From hyperas with MIT License 5 votes vote down vote up
def model(X_train, X_test, y_train, y_test, max_features, maxlen):
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(128))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
                                   verbose=1,
                                   save_best_only=True)

    model.fit(X_train, y_train,
              batch_size={{choice([32, 64, 128])}},
              nb_epoch=1,
              validation_split=0.08,
              callbacks=[early_stopping, checkpointer])

    score, acc = model.evaluate(X_test, y_test, verbose=0)

    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model} 
Example #22
Source File: kerasClassify.py    From emailinsight with MIT License 5 votes vote down vote up
def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1] 
Example #23
Source File: lstm.py    From dga_predict with GNU General Public License v2.0 5 votes vote down vote up
def build_model(max_features, maxlen):
    """Build LSTM model"""
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(128))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop')

    return model 
Example #24
Source File: models.py    From neurowriter with MIT License 5 votes vote down vote up
def create(inputtokens, vocabsize, layers=1, units=16, dropout=0, embedding=32):
        
        input_ = Input(shape=(inputtokens,), dtype='int32')
        
        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)
            
        # Bidirectional LSTM layer
        net = BatchNormalization()(net)
        net = Bidirectional(CuDNNLSTM(units, return_sequences=(layers > 1)))(net)
        net = Dropout(dropout)(net)
            
        # Rest of LSTM layers with residual connections (if any)
        for i in range(1, layers):
            if i < layers-1:
                block = BatchNormalization()(net)
                block = CuDNNLSTM(2*units, return_sequences=True)(block)
                block = Dropout(dropout)(block)
                net = add([block, net])
            else:
                net = BatchNormalization()(net)
                net = CuDNNLSTM(2*units)(net)
                net = Dropout(dropout)(net)
                    
        # Output layer
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)
        
        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model 
Example #25
Source File: models.py    From neurowriter with MIT License 5 votes vote down vote up
def create(inputtokens, vocabsize, kernels=64, wavenetblocks=1, dropout=0, embedding=32):
        kernel_size = 7
        maxdilation = inputtokens
        
        input_ = Input(shape=(inputtokens,), dtype='int32')
        # Embedding layer
        net = Embedding(input_dim=vocabsize, output_dim=embedding, input_length=inputtokens)(input_)
        net = Dropout(dropout)(net)
        # Wavenet starts!
        net = BatchNormalization()(net)
        net = Conv1D(kernels, 1, activation='tanh')(net)
        skip_connections = []
        for i in range(wavenetblocks):
            net, skip = wavenetblock(maxdilation, dropout, kernels, kernel_size)(net)
            skip_connections.append(skip)
        if wavenetblocks > 1:
            net = add(skip_connections)
        else:
            net = skip
        net = Conv1D(kernels, 1, activation='tanh')(net)
        net = Conv1D(kernels, 1)(net)
        net = Flatten()(net)
        net = Dense(vocabsize, activation='softmax')(net)
        model = Model(inputs=input_, outputs=net)
        
        # Make data-parallel
        ngpus = len(get_available_gpus())
        if ngpus > 1:
            model = make_parallel(model, ngpus)

        return model 
Example #26
Source File: rnn_text.py    From EventForecast with GNU Lesser General Public License v3.0 5 votes vote down vote up
def model_rnn(vocab, weights, dataPath, batchn, epoch, repeat):
    global LEN
    global DIM
    global BATCH
    testx, testy = build_dataset('%s%d'%(dataPath, 2528), vocab, weights=weights)
    testx = np.array(testx, dtype=np.float64)
    testy = np.array(testy, dtype=np.float64)
    
    # build and fit model
    model = Sequential()
    #model.add(Embedding(weights.shape[0],weights.shape[1], input_length=LEN, mask_zero=True,weights=[weights]))
    model.add(Bidirectional(LSTM(50, activation='relu', return_sequences=True), input_shape=(LEN, DIM)))
    model.add(Bidirectional(LSTM(50, activation='relu')))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    sgd = opt.SGD(lr=0.1, decay=1e-2, momentum=0.9)
    model.compile(loss='mean_squared_error', optimizer='adam')
    print(model.summary())
    #model.fit_generator(train_data_generator2('news_50_bin/news_stem_'), 500, epochs=10, verbose=2, validation_data=None)
    index = 0
    while index < epoch:
        data, result = build_dataset('%s%d'%(dataPath, index%2528), vocab, weights=weights)
        for i in range(1, batchn):
            index += 1
            newData, newResult = build_dataset('%s%d'%(dataPath, index), vocab, weights=weights)
            data.extend(newData)
            result.extend(newResult)
        model.fit(np.array(data, dtype=np.float64), np.array(result, dtype=np.float64), epochs=repeat, batch_size=BATCH, verbose=0, validation_data=(testx,testy))
        model.save('hotnews_r_%d_%d.h5'%(BATCH, index))
        predict = model.predict(testx)
        error = 0
        for i in range(testy.shape[0]):
            error += abs(testy[i] - predict[i][0])/testy[i]
            #print(testy[i], predict[i][0])
        print('batch %d of %d, epoch %d, absolute error: %f'%(index%2528+1, 2528, int(index/2528)+1, error/testy.shape[0]))
        index += 1
    return model

# train cnn model 
Example #27
Source File: models.py    From neurowriter with MIT License 5 votes vote down vote up
def create(inputtokens, vocabsize, convlayers=5, kernels=32,
               convdrop=0.1, denselayers=0, denseunits=64, densedrop=0.1,
               embedding=32):
        kernel_size = 2
        pool_size = 2
        if convlayers < 1:
            raise ValueError("Number of layers must be at least 1")
            
        model = Sequential()        
        # Embedding layer
        model.add(Embedding(input_dim=vocabsize, output_dim=embedding,
                            input_length=inputtokens))
        # First conv+pool layer        
        model.add(Conv1D(kernels, kernel_size, padding='causal', 
                         activation='relu'))
        model.add(Dropout(convdrop))
        model.add(MaxPooling1D(pool_size))
        # Additional dilated conv + pool layers (if possible)
        for i in range(1, convlayers):
            try:
                model.add(Conv1D(kernels, kernel_size, padding='causal', 
                                 dilation_rate=2**i, activation='relu'))
                model.add(Dropout(convdrop))
                model.add(MaxPooling1D(pool_size))
            except:
                print("Warning: not possible to add %i-th layer, moving to output" % i)
                break
                
        # Flatten and dense layers
        model.add(Flatten())
        for i in range(denselayers):
            model.add(Dense(denseunits, activation='relu'))
            model.add(Dropout(densedrop))
        # Output layer
        model.add(Dense(vocabsize, activation='softmax'))
        return model 
Example #28
Source File: TrajectoryTools.py    From TrajLib with Apache License 2.0 5 votes vote down vote up
def pre_trained_embedding_layer(word_to_vec_map, word_to_index):
    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["@"].shape[0]
    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    embedding_layer = Embedding(vocab_len, emb_dim)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer 
Example #29
Source File: rnn_text.py    From EventForecast with GNU Lesser General Public License v3.0 5 votes vote down vote up
def model_cnn(vocab, weights, dataPath, batchn, epoch):
    global LEN
    global DIM
    global BATCH
    testx, testy = build_dataset('%s%d'%(dataPath, 2528), vocab, weights=weights)
    testx = np.array(testx, dtype=np.float64)
    testy = np.array(testx, dtype=np.float64)
    model = Sequential()
    #model.add(Embedding(400001, 50, input_length=LEN, mask_zero=False,weights=[embedModel]))
    model.add(Conv1D(input_shape=(LEN, DIM), filters=32, kernel_size=30, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(250, activation='softmax'))
    model.add(Dense(1, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    index = 0
    while True:
        data, result = build_dataset('%s%d'%(dataPath, index%2528), vocab, weights)
        for i in range(1, batchn):
            index += 1
            newData, newResult = build_dataset('%s%d'%(dataPath, index), vocab, weights)
            data.extend(newData)
            result.extend(newResult)
        model.fit(np.array(data, dtype=np.float64), np.array(result, dtype=np.float64), epochs=10, batch_size=BATCH, verbose=2, validation_data = (testx,testy))
        model.save('hotnews_c_%d_%d.h5'%(BATCH, index))
        predict = model.predict(testx)
        for i in range(testy.shape[0]):
            print(testy[i], predict[i])
        index += 1
        if index > epoch:
            return model 
Example #30
Source File: utils.py    From CIKM-AnalytiCup-2018 with Apache License 2.0 5 votes vote down vote up
def get_word_embeddings(q1, q2, nb_words, embedding_size, embedding_matrix, max_sequence_length, trainable, embedding_dropout):
    embedding = Embedding(nb_words,
                    embedding_size,
                    weights=[embedding_matrix],
                    input_length=max_sequence_length,
                    trainable=trainable)
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(embedding_dropout)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(embedding_dropout)(q2_embed)
    return q1_embed, q1_embed