Python tensorflow.orthogonal_initializer() Examples
The following are 30
code examples of tensorflow.orthogonal_initializer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: generator.py From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 | 6 votes |
def init_param(self): idm = self.input_dim hs = self.hidden_size ws = len(self.window) nf = idm * ws # author's special initlaization strategy. self.Wemb = tf.get_variable(name=self.name + '_Wemb', shape=[self.vocab_size, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer()) self.bhid = tf.get_variable(name=self.name + '_bhid', shape=[self.vocab_size], dtype=tf.float32, initializer=tf.zeros_initializer()) self.Vhid = tf.get_variable(name=self.name + '_Vhid', shape=[hs, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer()) self.Vhid = dot(self.Vhid, self.Wemb) # [hidden_size, vocab_size] self.i2h_W = tf.get_variable(name=self.name + '_i2h_W', shape=[idm, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer()) self.h2h_W = tf.get_variable(name=self.name + '_h2h_W', shape=[hs, hs * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer()) self.z2h_W = tf.get_variable(name=self.name + '_z2h_W', shape=[nf, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer()) b_init_1 = tf.zeros((hs,)) b_init_2 = tf.ones((hs,)) * 3 b_init_3 = tf.zeros((hs,)) b_init_4 = tf.zeros((hs,)) b_init = tf.concat([b_init_1, b_init_2, b_init_3, b_init_4], axis=0) # b_init = tf.constant(b_init) # self.b = tf.get_variable(name=self.name + '_b', shape=[hs * 4], dtype=tf.float32, initializer=b_init) self.b = tf.get_variable(name=self.name + '_b', dtype=tf.float32, initializer=b_init) # ValueError: If initializer is a constant, do not specify shape. self.C0 = tf.get_variable(name=self.name + '_C0', shape=[nf, hs], dtype=tf.float32, initializer=tf.random_uniform_initializer()) self.b0 = tf.get_variable(name=self.name + '_b0', shape=[hs], dtype=tf.float32, initializer=tf.zeros_initializer())
Example #2
Source File: train.py From UNMT-SPR with MIT License | 6 votes |
def get_initializer(params): if params.initializer == "uniform": max_val = 0.1 * params.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif params.initializer == "normal": return tf.random_normal_initializer(0.0, params.initializer_gain) elif params.initializer == "orthogonal": return tf.orthogonal_initializer(params.initializer_gain) elif params.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="normal") elif params.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % params.initializer)
Example #3
Source File: classifiers.py From Parser-v3 with Apache License 2.0 | 6 votes |
def hidden(layer, hidden_size, hidden_func=nonlin.relu, hidden_keep_prob=1.): """""" layer_shape = nn.get_sizes(layer) input_size = layer_shape.pop() weights = tf.get_variable('Weights', shape=[input_size, hidden_size])#, initializer=tf.orthogonal_initializer) biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer) if hidden_keep_prob < 1.: if len(layer_shape) > 1: noise_shape = tf.stack(layer_shape[:-1] + [1, input_size]) else: noise_shape = None layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape) layer = nn.reshape(layer, [-1, input_size]) layer = tf.matmul(layer, weights) + biases layer = hidden_func(layer) layer = nn.reshape(layer, layer_shape + [hidden_size]) return layer #===============================================================
Example #4
Source File: esim.py From inferbeddings with MIT License | 6 votes |
def _transform_compare(self, sequence, sequence_length, reuse=False): with tf.variable_scope('transform_compare', reuse=reuse) as _: sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob) projection = tf.contrib.layers.fully_connected(inputs=sequence, num_outputs=self.representation_size, weights_initializer=tf.random_normal_initializer(0.0, 0.01), biases_initializer=tf.zeros_initializer(), activation_fn=tf.nn.relu) cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse, initializer=tf.orthogonal_initializer()) cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse, initializer=tf.orthogonal_initializer()) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=projection, sequence_length=sequence_length, dtype=tf.float32) return tf.concat(outputs, axis=2)
Example #5
Source File: optimize.py From fine-lm with MIT License | 6 votes |
def get_variable_initializer(hparams): """Get variable initializer from hparams.""" if not hparams.initializer: return None if not tf.contrib.eager.in_eager_mode(): tf.logging.info("Using variable initializer: %s", hparams.initializer) if hparams.initializer == "orthogonal": return tf.orthogonal_initializer(gain=hparams.initializer_gain) elif hparams.initializer == "uniform": max_val = 0.1 * hparams.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif hparams.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="normal") elif hparams.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="uniform") elif hparams.initializer == "xavier": return tf.contrib.layers.xavier_initializer() else: raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Example #6
Source File: util.py From sonic_contest with MIT License | 6 votes |
def nature_cnn(obs_batch, dense=tf.layers.dense): """ Apply the CNN architecture from the Nature DQN paper. The result is a batch of feature vectors. """ conv_kwargs = { 'activation': tf.nn.relu, 'kernel_initializer': tf.orthogonal_initializer(gain=math.sqrt(2)) } with tf.variable_scope('layer_1'): cnn_1 = tf.layers.conv2d(obs_batch, 32, 8, 4, **conv_kwargs) with tf.variable_scope('layer_2'): cnn_2 = tf.layers.conv2d(cnn_1, 64, 4, 2, **conv_kwargs) with tf.variable_scope('layer_3'): cnn_3 = tf.layers.conv2d(cnn_2, 64, 3, 1, **conv_kwargs) flat_size = product([x.value for x in cnn_3.get_shape()[1:]]) flat_in = tf.reshape(cnn_3, (tf.shape(cnn_3)[0], int(flat_size))) return dense(flat_in, 512, **conv_kwargs)
Example #7
Source File: network.py From ppo with MIT License | 6 votes |
def make_cnn(convs, padding, inpt, initializer=None): if initializer is None: initializer = tf.orthogonal_initializer(np.sqrt(2.0)) out = inpt with tf.variable_scope('convnet'): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d( out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=tf.nn.relu, weights_initializer=initializer ) return out
Example #8
Source File: fc1024.py From triplet-reid with MIT License | 6 votes |
def head(endpoints, embedding_dim, is_training): endpoints['head_output'] = slim.fully_connected( endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS, }) endpoints['emb_raw'] = slim.fully_connected( endpoints['head_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') endpoints['emb'] = tf.identity(endpoints['emb_raw'], name="out_emb") return endpoints
Example #9
Source File: fc1024_normalize.py From triplet-reid with MIT License | 6 votes |
def head(endpoints, embedding_dim, is_training): endpoints['head_output'] = slim.fully_connected( endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS, }) endpoints['emb_raw'] = slim.fully_connected( endpoints['head_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb") return endpoints
Example #10
Source File: fc1024.py From vehicle-triplet-reid with MIT License | 6 votes |
def head(endpoints, embedding_dim, is_training): endpoints['head_output'] = slim.fully_connected( endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS, }) endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected( endpoints['head_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') return endpoints
Example #11
Source File: fc1024_normalize.py From vehicle-triplet-reid with MIT License | 6 votes |
def head(endpoints, embedding_dim, is_training): endpoints['head_output'] = slim.fully_connected( endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS, }) endpoints['emb_raw'] = slim.fully_connected( endpoints['head_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1) return endpoints
Example #12
Source File: encoder.py From NAO with GNU General Public License v3.0 | 6 votes |
def __init__(self, x, y, params, mode, scope='Encoder', reuse=tf.AUTO_REUSE): self.x = x self.y = y self.params = params self.batch_size = tf.shape(x)[0] self.vocab_size = params['encoder_vocab_size'] self.emb_size = params['encoder_emb_size'] self.hidden_size = params['encoder_hidden_size'] self.encoder_length = params['encoder_length'] self.weight_decay = params['weight_decay'] self.mode = mode self.time_major = params['time_major'] self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN if not self.is_training: self.params['encoder_dropout'] = 0.0 self.params['mlp_dropout'] = 0.0 #initializer = tf.orthogonal_initializer() initializer = tf.random_uniform_initializer(-0.1, 0.1) tf.get_variable_scope().set_initializer(initializer) self.build_graph(scope=scope, reuse=reuse)
Example #13
Source File: encoder.py From NAO with GNU General Public License v3.0 | 6 votes |
def __init__(self, x, y, params, mode, scope='Encoder', reuse=False): self.x = x self.y = y self.params = params self.batch_size = tf.shape(x)[0] self.vocab_size = params['encoder_vocab_size'] self.emb_size = params['encoder_emb_size'] self.hidden_size = params['encoder_hidden_size'] self.encoder_length = params['encoder_length'] self.weight_decay = params['weight_decay'] self.mode = mode self.time_major = params['time_major'] self.weighted_loss = params['weighted_loss'] self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN if not self.is_training: self.params['encoder_dropout'] = 0.0 self.params['mlp_dropout'] = 0.0 #initializer = tf.orthogonal_initializer() self.build_graph(scope, reuse)
Example #14
Source File: operations.py From DeepPavlov with Apache License 2.0 | 6 votes |
def matmul_2d(x, out_dimension, drop_prob=None): '''Multiplies 2-d tensor by weights. Args: x: a tensor with shape [batch, dimension] out_dimension: a number Returns: a tensor with shape [batch, out_dimension] Raises: ''' W = tf.get_variable( name='weights', shape=[x.shape[1], out_dimension], dtype=tf.float32, initializer=tf.orthogonal_initializer()) if drop_prob is not None: W = tf.nn.dropout(W, drop_prob) log.info('W is dropout') return tf.matmul(x, W)
Example #15
Source File: direct.py From vehicle-triplet-reid with MIT License | 5 votes |
def head(endpoints, embedding_dim, is_training): endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected( endpoints['model_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') return endpoints
Example #16
Source File: decoder.py From NAO with GNU General Public License v3.0 | 5 votes |
def __init__(self, encoder_outputs, encoder_state, target_input, target, params, mode, scope=None): """Create the model.""" self.params = params self.encoder_outputs = encoder_outputs self.encoder_state = encoder_state self.target_input = target_input self.target = target self.batch_size = tf.shape(self.target_input)[0] self.mode = mode self.vocab_size = params['decoder_vocab_size'] self.num_layers = params['decoder_num_layers'] self.decoder_length = params['decoder_length'] self.time_major = params['time_major'] self.hidden_size = params['decoder_hidden_size'] self.weight_decay = params['weight_decay'] self.is_traing = mode == tf.estimator.ModeKeys.TRAIN if not self.is_traing: self.params['decoder_dropout'] = 0.0 self.branch_length = self.decoder_length // 2 // 5 // 2 # 2 types of cell, 5 nodes, 2 branchs # Initializer #initializer = tf.orthogonal_initializer() initializer = tf.random_uniform_initializer(-0.1, 0.1) tf.get_variable_scope().set_initializer(initializer) ## Build graph self.build_graph(scope=scope)
Example #17
Source File: ops.py From HyperGAN with MIT License | 5 votes |
def orthogonal_initializer(self, gain): def _build(shape): return tf.orthogonal_initializer(gain) return _build
Example #18
Source File: hyperparams_builder.py From aster with MIT License | 5 votes |
def _build_initializer(initializer): """Build a tf initializer from config. Args: initializer: hyperparams_pb2.Hyperparams.regularizer proto. Returns: tf initializer. Raises: ValueError: On unknown initializer. """ initializer_oneof = initializer.WhichOneof('initializer_oneof') if initializer_oneof == 'truncated_normal_initializer': return tf.truncated_normal_initializer( mean=initializer.truncated_normal_initializer.mean, stddev=initializer.truncated_normal_initializer.stddev) if initializer_oneof == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = enum_descriptor.values_by_number[initializer. variance_scaling_initializer. mode].name return layers.variance_scaling_initializer( factor=initializer.variance_scaling_initializer.factor, mode=mode, uniform=initializer.variance_scaling_initializer.uniform) if initializer_oneof == 'orthogonal_initializer': return tf.orthogonal_initializer( gain=initializer.orthogonal_initializer.gain, seed=initializer.orthogonal_initializer.seed ) if initializer_oneof == 'uniform_initializer': return tf.random_uniform_initializer( minval=initializer.uniform_initializer.minval, maxval=initializer.uniform_initializer.maxval) raise ValueError('Unknown initializer function: {}'.format( initializer_oneof))
Example #19
Source File: esim.py From inferbeddings with MIT License | 5 votes |
def _transform_input(self, sequence, sequence_length, reuse=False): with tf.variable_scope('transform_input', reuse=reuse) as _: sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob) cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse, initializer=tf.orthogonal_initializer()) cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse, initializer=tf.orthogonal_initializer()) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=sequence, sequence_length=sequence_length, dtype=tf.float32) return tf.concat(outputs, axis=2)
Example #20
Source File: direct_normalize.py From triplet-reid with MIT License | 5 votes |
def head(endpoints, embedding_dim, is_training): endpoints['emb_raw'] = slim.fully_connected( endpoints['model_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb") return endpoints
Example #21
Source File: network.py From ppo with MIT License | 5 votes |
def cnn_network(convs, fcs, use_lstm, padding, inpt, masks, rnn_state, num_actions, lstm_unit, nenvs, step_size, scope): out = make_cnn(convs, padding, inpt) out = layers.flatten(out) out = make_fcs(fcs, out) rnn_out, rnn_state = make_lstm( lstm_unit, nenvs, step_size, out, masks, rnn_state) if use_lstm: out = rnn_out policy = layers.fully_connected( out, num_actions, activation_fn=None, weights_initializer=tf.orthogonal_initializer(0.1)) dist = tf.distributions.Categorical(probs=tf.nn.softmax(policy)) value = layers.fully_connected( out, 1, activation_fn=None, weights_initializer=tf.orthogonal_initializer(1.0)) return dist, value, rnn_state
Example #22
Source File: network.py From ppo with MIT License | 5 votes |
def make_fcs(fcs, inpt, activation=tf.nn.relu, initializer=None): if initializer is None: initializer = tf.orthogonal_initializer(np.sqrt(2.0)) out = inpt with tf.variable_scope('hiddens'): for hidden in fcs: out = layers.fully_connected(out, hidden, activation_fn=activation, weights_initializer=initializer) return out
Example #23
Source File: SAR.py From ChID-Dataset with Apache License 2.0 | 5 votes |
def __init__(self, learning_rate, init_word_embed, init_idiom_embed, size_embed=200, num_units=100, # make sure that num_units = size_embed / 2 max_gradient_norm=5.0): assert size_embed == 2 * num_units super(Model, self).__init__() super(Model, self)._create_embedding(init_word_embed, init_idiom_embed) doc_embedding = tf.cond(self.is_train, lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5), lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document)) # [batch, length, size_embed] can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates) # [batch, num_labels, 10, size_embed] with tf.variable_scope("doc"): cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32, scope="bi_lstm") state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units] blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units] bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32) attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units] tf.transpose(state_doc, [0, 2, 1])) # [batch, 2 * num_units, length] tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1]) attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True)) #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length] state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units] match_matrix = tf.einsum("abcd,abd->abc", can_embedding, state_attention) # [batch, labels, 10] self.logits = tf.nn.softmax(match_matrix) super(Model, self)._create_loss() super(Model, self)._create_train_step(learning_rate, max_gradient_norm)
Example #24
Source File: LM.py From ChID-Dataset with Apache License 2.0 | 5 votes |
def __init__(self, learning_rate, init_word_embed, init_idiom_embed, size_embed=200, num_units=100, # make sure that num_units = size_embed / 2 max_gradient_norm=5.0): assert size_embed == 2 * num_units super(Model, self).__init__() super(Model, self)._create_embedding(init_word_embed, init_idiom_embed) doc_embedding = tf.cond(self.is_train, lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5), lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document)) # [batch, length, size_embed] can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates) # [batch, num_labels, 10, size_embed] with tf.variable_scope("doc"): cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32, scope="bi_lstm") state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units] blanks_states = tf.matmul(self.locations, state_doc) # [batch, labels, 2 * num_units] match_matrix = tf.einsum("abcd,abd->abc", can_embedding, blanks_states) # [batch, labels, 10] self.logits = tf.nn.softmax(match_matrix) super(Model, self)._create_loss() super(Model, self)._create_train_step(learning_rate, max_gradient_norm)
Example #25
Source File: classifiers.py From Parser-v3 with Apache License 2.0 | 5 votes |
def hiddens(layer, hidden_sizes, hidden_func=nonlin.relu, hidden_keep_prob=1.): """""" layer_shape = nn.get_sizes(layer) input_size = layer_shape.pop() weights = [] for i, hidden_size in enumerate(hidden_sizes): weights.append(tf.get_variable('Weights-%d' % i, shape=[input_size, hidden_size]))#, initializer=tf.orthogonal_initializer)) weights = tf.concat(weights, axis=1) hidden_size = sum(hidden_sizes) biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer) if hidden_keep_prob < 1.: if len(layer_shape) > 1: noise_shape = tf.stack(layer_shape[:-1] + [1, input_size]) else: noise_shape = None layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape) layer = nn.reshape(layer, [-1, input_size]) layer = tf.matmul(layer, weights) + biases layer = hidden_func(layer) layer = nn.reshape(layer, layer_shape + [hidden_size]) layers = tf.split(layer, hidden_sizes, axis=-1) return layers #===============================================================
Example #26
Source File: SAR.py From ChID-Dataset with Apache License 2.0 | 5 votes |
def __init__(self, learning_rate, init_word_embed, init_idiom_embed, size_embed=200, num_units=100, # make sure that num_units = size_embed / 2 max_gradient_norm=5.0): assert size_embed == 2 * num_units super(Model, self).__init__() super(Model, self)._create_embedding(init_word_embed, init_idiom_embed) doc_embedding = tf.cond(self.is_train, lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5), lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document)) # [batch, length, size_embed] can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates) # [batch, 10, size_embed] with tf.variable_scope("doc"): cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32, scope="bi_lstm") state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units] blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units] bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32) attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units] tf.transpose(state_doc, [0, 2, 1])) # [batch, 2 * num_units, length] tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1]) attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True)) #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length] state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units] match_matrix = tf.matmul(state_attention, tf.transpose(can_embedding, [0, 2, 1])) # [batch, labels, 10] self.logits = tf.nn.softmax(match_matrix) super(Model, self)._create_loss() super(Model, self)._create_train_step(learning_rate, max_gradient_norm)
Example #27
Source File: LM.py From ChID-Dataset with Apache License 2.0 | 5 votes |
def __init__(self, learning_rate, init_word_embed, init_idiom_embed, size_embed=200, num_units=100, # make sure that num_units = size_embed / 2 max_gradient_norm=5.0): assert size_embed == 2 * num_units super(Model, self).__init__() super(Model, self)._create_embedding(init_word_embed, init_idiom_embed) doc_embedding = tf.cond(self.is_train, lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5), lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document)) # [batch, length, size_embed] can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates) # [batch, 10, size_embed] with tf.variable_scope("doc"): cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer()) h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32, scope="bi_lstm") state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units] blanks_states = tf.matmul(self.locations, state_doc) # [batch, labels, 2 * num_units] match_matrix = tf.matmul(blanks_states, tf.transpose(can_embedding, [0, 2, 1])) # [batch, labels, 10] self.logits = tf.nn.softmax(match_matrix) super(Model, self)._create_loss() super(Model, self)._create_train_step(learning_rate, max_gradient_norm)
Example #28
Source File: ortho_gru_cell.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def call(self, inputs, state): """Gated recurrent unit (GRU) with nunits cells.""" with tf.variable_scope("gates"): input_to_gates = tf.layers.dense( inputs, 2 * self._num_units, name="input_proj", use_bias=self.use_input_bias) # Nematus does the orthogonal initialization probably differently state_to_gates = tf.layers.dense( state, 2 * self._num_units, use_bias=self.use_state_bias, kernel_initializer=orthogonal_initializer(), name="state_proj") gates_input = state_to_gates + input_to_gates reset, update = tf.split( tf.sigmoid(gates_input), num_or_size_splits=2, axis=1) with tf.variable_scope("candidate"): input_to_candidate = tf.layers.dense( inputs, self._num_units, use_bias=self.use_input_bias, name="input_proj") state_to_candidate = tf.layers.dense( state, self._num_units, use_bias=self.use_state_bias, kernel_initializer=orthogonal_initializer(), name="state_proj") candidate = self._activation( state_to_candidate * reset + input_to_candidate) new_state = update * state + (1 - update) * candidate return new_state, new_state
Example #29
Source File: decoder.py From NAO with GNU General Public License v3.0 | 5 votes |
def __init__(self, encoder_outputs, encoder_state, target_input, target, params, mode, scope=None, reuse=False): """Create the model.""" self.params = params self.encoder_outputs = encoder_outputs self.encoder_state = encoder_state self.target_input = target_input self.target = target self.batch_size = tf.shape(self.target_input)[0] self.mode = mode self.vocab_size = params['decoder_vocab_size'] self.num_layers = params['decoder_num_layers'] self.time_major = params['time_major'] self.hidden_size = params['decoder_hidden_size'] self.weight_decay = params['weight_decay'] self.wn = params['wn'] self.is_traing = mode == tf.estimator.ModeKeys.TRAIN if not self.is_traing: self.params['decoder_dropout'] = 0.0 # Initializer #initializer = tf.orthogonal_initializer() ## Build graph self.build_graph(scope, reuse)
Example #30
Source File: ortho_gru_cell.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, num_units, activation=None, reuse=None): tf.contrib.rnn.GRUCell.__init__( self, num_units, activation, reuse, kernel_initializer=tf.orthogonal_initializer())