Python keras.layers.recurrent.GRU Examples
The following are 30
code examples of keras.layers.recurrent.GRU().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.layers.recurrent
, or try the search function
.
Example #1
Source File: test_tasks.py From CAPTCHA-breaking with MIT License | 7 votes |
def test_temporal_clf(self): print('temporal classification data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), classification=True, nb_class=2) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(GRU(X_train.shape[-1], y_train.shape[-1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) self.assertTrue(history.history['val_acc'][-1] > 0.9)
Example #2
Source File: model.py From TrafficFlowPrediction with MIT License | 6 votes |
def get_gru(units): """GRU(Gated Recurrent Unit) Build GRU Model. # Arguments units: List(int), number of input, output and hidden units. # Returns model: Model, nn model. """ model = Sequential() model.add(GRU(units[1], input_shape=(units[0], 1), return_sequences=True)) model.add(GRU(units[2])) model.add(Dropout(0.2)) model.add(Dense(units[3], activation='sigmoid')) return model
Example #3
Source File: understand.py From soph with MIT License | 6 votes |
def understand_variable_length_handle(): """用来帮助理解如何用 recurrent layer 处理变长序列""" model = Sequential() model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 78, 256) model.fit(train_x, train_y, verbose=0) inz_1 = np.random.randn(1, 78, 256) rez_1 = model.predict_proba(inz_1, verbose=0) inz_2 = np.random.randn(1, 87, 256) rez_2 = model.predict_proba(inz_2, verbose=0) print() print('=========== understand variable length =================') print('With `return_sequence=True`') print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape)) print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape)) print('====================== end =============================')
Example #4
Source File: understand.py From soph with MIT License | 6 votes |
def try_variable_length_train(): """变长序列训练实验 实验失败,这样得到的 train_x 和 train_y 的 dtype 是 object 类型, 取其 shape 得到的是 (100,) ,这将导致训练出错 """ model = Sequential() model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model.compile(loss='mean_squared_error', optimizer='sgd') train_x = [] train_y = [] for i in range(100): seq_length = np.random.randint(78, 87 + 1) sequence = [] for _ in range(seq_length): sequence.append([np.random.randn() for _ in range(256)]) train_x.append(np.array(sequence)) train_y.append(np.array(sequence)) train_x = np.array(train_x) train_y = np.array(train_y) model.fit(np.array(train_x), np.array(train_y))
Example #5
Source File: model.py From TrafficFlowPrediction with MIT License | 6 votes |
def get_gru(units): """GRU(Gated Recurrent Unit) Build GRU Model. # Arguments units: List(int), number of input, output and hidden units. # Returns model: Model, nn model. """ model = Sequential() model.add(GRU(units[1], input_shape=(units[0], 1), return_sequences=True)) model.add(GRU(units[2])) model.add(Dropout(0.2)) model.add(Dense(units[3], activation='sigmoid')) return model
Example #6
Source File: model.py From DeepSequenceClassification with GNU General Public License v2.0 | 6 votes |
def gen_model(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"): RNN_CLASS = LSTM if RNN_LAYER_TYPE == "GRU": RNN_CLASS = GRU logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\ (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size)) logger.info("Building Model") model = Sequential() logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen)) model.add(Embedding(vocab_size, embedding_size, input_length=maxlen)) logger.info("Added Embedding Layer") model.add(Dropout(0.5)) logger.info("Added Dropout Layer") for i in xrange(num_hidden_layers): model.add(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True)) logger.info("Added %s Layer" % RNN_LAYER_TYPE) model.add(Dropout(0.5)) logger.info("Added Dropout Layer") model.add(RNN_CLASS(output_dim=output_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True)) logger.info("Added %s Layer" % RNN_LAYER_TYPE) model.add(Dropout(0.5)) logger.info("Added Dropout Layer") model.add(TimeDistributedDense(output_size, activation="softmax")) logger.info("Added Dropout Layer") logger.info("Created model with following config:\n%s" % json.dumps(model.get_config(), indent=4)) logger.info("Compiling model with optimizer %s" % optimizer) start_time = time.time() model.compile(loss='categorical_crossentropy', optimizer=optimizer) total_time = time.time() - start_time logger.info("Model compiled in %.4f seconds." % total_time) return model
Example #7
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #8
Source File: model.py From deepchem with MIT License | 5 votes |
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length): h = Dense(latent_rep_size, name='latent_input', activation='relu')(z) h = RepeatVector(max_length, name='repeat_vector')(h) h = GRU(501, return_sequences=True, name='gru_1')(h) h = GRU(501, return_sequences=True, name='gru_2')(h) h = GRU(501, return_sequences=True, name='gru_3')(h) return TimeDistributed( Dense(charset_length, activation='softmax'), name='decoded_mean')(h)
Example #9
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #10
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #11
Source File: model.py From LipNet with MIT License | 5 votes |
def build(self): if K.image_data_format() == 'channels_first': input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h) else: input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c) self.input_data = Input(name='the_input', shape=input_shape, dtype='float32') self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data) self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), activation='relu', kernel_initializer='he_normal', name='conv1')(self.zero1) self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.conv1) self.drop1 = Dropout(0.5)(self.maxp1) self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.drop1) self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv2')(self.zero2) self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.conv2) self.drop2 = Dropout(0.5)(self.maxp2) self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.drop2) self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv3')(self.zero3) self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.conv3) self.drop3 = Dropout(0.5)(self.maxp3) self.resh1 = TimeDistributed(Flatten())(self.drop3) self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1) self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1) # transforms RNN output to character activations: self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2) self.y_pred = Activation('softmax', name='softmax')(self.dense1) self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32') self.input_length = Input(name='input_length', shape=[1], dtype='int64') self.label_length = Input(name='label_length', shape=[1], dtype='int64') self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length]) self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out)
Example #12
Source File: grud_layers.py From GRU-D with MIT License | 5 votes |
def build(self, input_shape): # Note input_shape will be list of shapes of initial states # if these are passed in __call__. if not isinstance(input_shape, list) or len(input_shape) <= 2: raise ValueError('input_shape of GRU-D should be a list of at least 3.') input_shape = input_shape[:3] batch_size = input_shape[0][0] if self.stateful else None self.input_spec[0] = InputSpec(shape=(batch_size, None, input_shape[0][-1])) self.input_spec[1] = InputSpec(shape=(batch_size, None, input_shape[1][-1])) self.input_spec[2] = InputSpec(shape=(batch_size, None, 1)) # allow GRUDCell to build before we set or validate state_spec step_input_shape = [(i_s[0],) + i_s[2:] for i_s in input_shape] self.cell.build(step_input_shape) # set or validate state_spec state_size = list(self.cell.state_size) if self.state_spec is not None: # initial_state was passed in call, check compatibility if [spec.shape[-1] for spec in self.state_spec] != state_size: raise ValueError( 'An `initial_state` was passed that is not compatible with ' '`cell.state_size`. Received `state_spec`={}; ' 'however `cell.state_size` is ' '{}'.format(self.state_spec, self.cell.state_size)) else: self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] if self.stateful: self.reset_states() self.built = True
Example #13
Source File: grud_layers.py From GRU-D with MIT License | 5 votes |
def __call__(self, inputs, initial_state=None, **kwargs): # We skip `__call__` of `RNN` and `GRU` in this case and directly execute # GRUD's great-grandparent's method. inputs, initial_state = _standardize_grud_args(inputs, initial_state) if initial_state is None: return super(RNN, self).__call__(inputs, **kwargs) # If `initial_state` is specified and is Keras # tensors, then add it to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [InputSpec(shape=K.int_shape(state)) for state in initial_state] additional_specs += self.state_spec # at this point additional_inputs cannot be empty is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants full_input = inputs + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(RNN, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output return super(RNN, self).__call__(inputs, **kwargs)
Example #14
Source File: attentive_gru.py From deep_qa with Apache License 2.0 | 5 votes |
def build(self, input_shape): """ This is used by Keras to verify things, but also to build the weights. The only differences from the Keras GRU (which we copied exactly other than the below) are: We generate weights with dimension input_dim[2] - 1, rather than dimension input_dim[2]. There are a few variables which are created in non-'gpu' modes which are not required. These are commented out but left in for clarity below. """ new_input_shape = list(input_shape) new_input_shape[2] -= 1 super(AttentiveGru, self).build(tuple(new_input_shape)) self.input_spec = [InputSpec(shape=input_shape)]
Example #15
Source File: attentive_gru.py From deep_qa with Apache License 2.0 | 5 votes |
def preprocess_input(self, inputs, training=None): """ We have to override this preprocessing step, because if we are using the cpu, we do the weight - input multiplications in the internals of the GRU as separate, smaller matrix multiplications and concatenate them after. Therefore, before this happens, we split off the attention and then add it back afterwards. """ if self.implementation == 0: attention = inputs[:, :, 0] # Shape:(samples, knowledge_length) inputs = inputs[:, :, 1:] # Shape:(samples, knowledge_length, word_dim) input_shape = self.input_spec[0].shape input_dim = input_shape[2] - 1 timesteps = input_shape[1] x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z, self.dropout, input_dim, self.units, timesteps, training=training) x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r, self.dropout, input_dim, self.units, timesteps, training=training) x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h, self.dropout, input_dim, self.units, timesteps, training=training) # Add attention back on to it's original place. return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h], axis=2) else: return inputs
Example #16
Source File: kerasClassify.py From emailinsight with MIT License | 5 votes |
def evaluate_recurrent_model(dataset,num_classes): (X_train, Y_train), (X_test, Y_test) = dataset max_features = 20000 maxlen = 125 # cut texts after this number of words (among top max_features most common words) batch_size = 32 print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time) with maxlen %d"%maxlen) X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen)) model.add(GRU(512)) # try using a GRU instead, for fun model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # try using different optimizers and different optimizer configs model.compile(loss='categorical_crossentropy',optimizer='adam') print("Train...") model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_test, Y_test), show_accuracy=True) score, acc = model.evaluate(X_test, Y_test, batch_size=batch_size, show_accuracy=True) if verbose: print('Test score:', score) print('Test accuracy:', acc) return score[1]
Example #17
Source File: model.py From keras-molecules with MIT License | 5 votes |
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length): h = Dense(latent_rep_size, name='latent_input', activation = 'relu')(z) h = RepeatVector(max_length, name='repeat_vector')(h) h = GRU(501, return_sequences = True, name='gru_1')(h) h = GRU(501, return_sequences = True, name='gru_2')(h) h = GRU(501, return_sequences = True, name='gru_3')(h) return TimeDistributed(Dense(charset_length, activation='softmax'), name='decoded_mean')(h)
Example #18
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #19
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #20
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #21
Source File: recurrent_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def rnn_test(f): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ f = keras_test(f) return pytest.mark.parametrize('layer_class', [ recurrent.SimpleRNN, recurrent.GRU, recurrent.LSTM ])(f)
Example #22
Source File: ntm.py From ntm_keras with BSD 3-Clause "New" or "Revised" License | 5 votes |
def build(self, input_shape): bs, input_length, input_dim = input_shape self.controller_input_dim, self.controller_output_dim = controller_input_output_shape( input_dim, self.units, self.m_depth, self.n_slots, self.shift_range, self.read_heads, self.write_heads) # Now that we've calculated the shape of the controller, we have add it to the layer/model. if self.controller is None: self.controller = Dense( name = "controller", activation = 'linear', bias_initializer = 'zeros', units = self.controller_output_dim, input_shape = (bs, input_length, self.controller_input_dim)) self.controller.build(input_shape=(self.batch_size, input_length, self.controller_input_dim)) self.controller_with_state = False # This is a fixed shift matrix self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.controller.trainable_weights # We need to declare the number of states we want to carry around. # In our case the dimension seems to be 6 (LSTM) or 5 (GRU) or 4 (FF), # see self.get_initial_states, those respond to: # [old_ntm_output] + [init_M, init_wr, init_ww] + [init_h] (LSMT and GRU) + [(init_c] (LSTM only)) # old_ntm_output does not make sense in our world, but is required by the definition of the step function we # intend to use. # WARNING: What self.state_spec does is only poorly understood, # I only copied it from keras/recurrent.py. self.states = [None, None, None, None] self.state_spec = [InputSpec(shape=(None, self.output_dim)), # old_ntm_output InputSpec(shape=(None, self.n_slots, self.m_depth)), # Memory InputSpec(shape=(None, self.read_heads, self.n_slots)), # weights_read InputSpec(shape=(None, self.write_heads, self.n_slots))] # weights_write super(NeuralTuringMachine, self).build(input_shape)
Example #23
Source File: models.py From deepchem with MIT License | 5 votes |
def __init__(self, input_shape, gru_size=10, tdd_size=4): self.model = Sequential() self.model.add( GRU(gru_size, return_sequences=True, input_shape=input_shape)) if tdd_size is not None: self.model.add(TimeDistributedDense(tdd_size)) self.model.add(Flatten()) self.model.add(Dense(1)) self.model.add(Activation('sigmoid')) print('Compiling model...') self.model.compile(optimizer='adam', loss='binary_crossentropy')
Example #24
Source File: model.py From DeepSequenceClassification with GNU General Public License v2.0 | 5 votes |
def gen_model_brnn_multitask(vocab_size=100, embedding_size=128, maxlen=100, output_size=[6, 96], hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"): RNN_CLASS = LSTM if RNN_LAYER_TYPE == "GRU": RNN_CLASS = GRU logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\ (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size)) logger.info("Building Graph model for Bidirectional RNN") model = Graph() model.add_input(name='input', input_shape=(maxlen,), dtype=int) logger.info("Added Input node") logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen)) model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen, mask_zero=True), name='embedding', input='input') logger.info("Added Embedding node") model.add_node(Dropout(0.5), name="dropout_0", input="embedding") logger.info("Added Dropout Node") for i in xrange(num_hidden_layers): last_dropout_name = "dropout_%s" % i forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]] model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name) logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1)) model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name) logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1)) model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name]) logger.info("Added Dropout node[%s]" % (i+1)) output_names = [] for i, output_task_size in enumerate(output_size): tdd_name, output_name = "tdd_%s" % i, "output_%s" % i model.add_node(TimeDistributedDense(output_task_size, activation="softmax"), name=tdd_name, input=dropout_name) logger.info("Added TimeDistributedDense node %s with output_size %s" % (i, output_task_size)) model.add_output(name=output_name, input=tdd_name) output_names.append(output_name) logger.info("Added Output node") logger.info("Created model with following config:\n%s" % model.get_config()) logger.info("Compiling model with optimizer %s" % optimizer) start_time = time.time() model.compile(optimizer, {k: 'categorical_crossentropy' for k in output_names}) total_time = time.time() - start_time logger.info("Model compiled in %.4f seconds." % total_time) return model, output_names
Example #25
Source File: model.py From DeepSequenceClassification with GNU General Public License v2.0 | 5 votes |
def gen_model_brnn(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"): RNN_CLASS = LSTM if RNN_LAYER_TYPE == "GRU": RNN_CLASS = GRU logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\ (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size)) logger.info("Building Graph model for Bidirectional RNN") model = Graph() model.add_input(name='input', input_shape=(maxlen,), dtype=int) logger.info("Added Input node") logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen)) model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen), name='embedding', input='input') logger.info("Added Embedding node") model.add_node(Dropout(0.5), name="dropout_0", input="embedding") logger.info("Added Dropout Node") for i in xrange(num_hidden_layers): last_dropout_name = "dropout_%s" % i forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]] model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name) logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1)) model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name) logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1)) model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name]) logger.info("Added Dropout node[%s]" % (i+1)) model.add_node(TimeDistributedDense(output_size, activation="softmax"), name="tdd", input=dropout_name) logger.info("Added TimeDistributedDense node") model.add_output(name="output", input="tdd") logger.info("Added Output node") logger.info("Created model with following config:\n%s" % model.get_config()) logger.info("Compiling model with optimizer %s" % optimizer) start_time = time.time() model.compile(optimizer, {"output": 'categorical_crossentropy'}) total_time = time.time() - start_time logger.info("Model compiled in %.4f seconds." % total_time) return model
Example #26
Source File: test_tasks.py From CAPTCHA-breaking with MIT License | 5 votes |
def test_temporal_reg(self): print('temporal regression data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,), classification=False) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) model = Sequential() model.add(GRU(X_train.shape[-1], y_train.shape[-1])) model.compile(loss='hinge', optimizer='adam') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) self.assertTrue(history.history['val_loss'][-1] < 0.8)
Example #27
Source File: pig_latin.py From soph with MIT License | 5 votes |
def build_model(input_size, seq_len, hidden_size): """建立一个 sequence to sequence 模型""" model = Sequential() model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False)) model.add(Dense(hidden_size, activation="relu")) model.add(RepeatVector(seq_len)) model.add(GRU(hidden_size, return_sequences=True)) model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear"))) model.compile(loss="mse", optimizer='adam') return model
Example #28
Source File: understand.py From soph with MIT License | 5 votes |
def try_variable_length_train_in_batch(): """变长序列训练实验(2)""" model = Sequential() model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model.compile(loss='mean_squared_error', optimizer='sgd') # 分作两个 batch, 不同 batch 中的 sequence 长度不一样 seq_lens = [78, 87] for i in range(2): train_x = np.random.randn(20, seq_lens[i], 256) train_y = np.random.randn(20, seq_lens[i], 256) model.train_on_batch(train_x, train_y)
Example #29
Source File: understand.py From soph with MIT License | 5 votes |
def understand_return_sequence(): """用来帮助理解 recurrent layer 中的 return_sequences 参数""" model_1 = Sequential() model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) model_1.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 78, 256) model_1.fit(train_x, train_y, verbose=0) model_2 = Sequential() model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False)) model_2.compile(loss='mean_squared_error', optimizer='sgd') train_x = np.random.randn(100, 78, 256) train_y = np.random.randn(100, 256) model_2.fit(train_x, train_y, verbose=0) inz = np.random.randn(100, 78, 256) rez_1 = model_1.predict_proba(inz, verbose=0) rez_2 = model_2.predict_proba(inz, verbose=0) print() print('=========== understand return_sequence =================') print('Input shape is: {}'.format(inz.shape)) print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape)) print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape)) print('====================== end =============================')
Example #30
Source File: tgru_k2_gpu.py From chemical_vae with Apache License 2.0 | 4 votes |
def build(self, input_shape): # all of this is copied from GRU, except for one part commented below if isinstance(input_shape, list): input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None self.input_dim = input_shape[2] self.input_spec = [InputSpec(shape=(batch_size, None, self.input_dim)), InputSpec(shape=(batch_size, None, self.units))] self.state_spec = InputSpec(shape=(batch_size, self.units)) self.states = [None] if self.stateful: self.reset_states() self.kernel = self.add_weight((self.input_dim, self.units * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) # adding an extra recurrent weight here, change from GRU layer: # this last recurrent weight applied to true sequence input from prev. timestep, # or sampled output from prev. time step. self.recurrent_kernel = self.add_weight( (self.units, self.units * 4), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: self.bias = self.add_weight((self.units * 4,), name='bias', initializer='zero', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.kernel_z = self.kernel[:, :self.units] self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] self.kernel_r = self.kernel[:, self.units: self.units * 2] self.recurrent_kernel_r = self.recurrent_kernel[:, self.units: self.units * 2] self.kernel_h = self.kernel[:, self.units * 2:] self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:self.units * 3] self.recurrent_kernel_y = self.recurrent_kernel[:, self.units * 3:] if self.use_bias: self.bias_z = self.bias[:self.units] self.bias_r = self.bias[self.units: self.units * 2] self.bias_h = self.bias[self.units * 2: self.units * 3] self.bias_h = self.bias[self.units * 3:] else: self.bias_z = None self.bias_r = None self.bias_h = None self.built = True