Python tensorflow.python.ops.rnn.bidirectional_dynamic_rnn() Examples
The following are 15
code examples of tensorflow.python.ops.rnn.bidirectional_dynamic_rnn().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.rnn
, or try the search function
.
Example #1
Source File: BidirectLSTMLayer.py From NPNs with GNU General Public License v3.0 | 5 votes |
def __call__(self,inputs,seq_len): if self.output_dim % 2 !=0: print "The output dimension of BidirectLSTMLayer should be even. " exit(-1) with tf.variable_scope(self.scope) as scope: self.check_reuse(scope) scope.reuse_variables() cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype)) #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype) return rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)
Example #2
Source File: BidirectLSTMLayer.py From AdaScaling with GNU General Public License v3.0 | 5 votes |
def __call__(self,inputs,seq_len): if self.output_dim % 2 !=0: print "The output dimension of BidirectLSTMLayer should be even. " exit(-1) with tf.variable_scope(self.scope) as scope: self.check_reuse(scope) #scope.reuse_variables() f_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype)) b_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype)) #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype) return rnn.bidirectional_dynamic_rnn(f_cell,b_cell,inputs,seq_len,dtype = inputs.dtype)
Example #3
Source File: rnns.py From AmusingPythonCodes with MIT License | 5 votes |
def __call__(self, inputs, seq_len, return_last_state=False, time_major=False): assert not time_major, "BiRNN class cannot support time_major currently" with tf.variable_scope(self.scope): flat_inputs = flatten(inputs, keep=2) # reshape to [-1, max_time, dim] seq_len = flatten(seq_len, keep=0) # reshape to [x] (one dimension sequence) outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs, sequence_length=seq_len, dtype=tf.float32) if return_last_state: # return last states output = tf.concat([h_fw, h_bw], axis=-1) # shape = [-1, 2 * num_units] output = reconstruct(output, ref=inputs, keep=2, remove_shape=1) # remove the max_time shape else: output = tf.concat(outputs, axis=-1) # shape = [-1, max_time, 2 * num_units] output = reconstruct(output, ref=inputs, keep=2) # reshape to same as inputs, except the last two dim return output
Example #4
Source File: multi_attention_model.py From neural_sequence_labeling with MIT License | 5 votes |
def _build_model_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_single_rnn_cell(self.cfg["num_units"]) cell_bw = self._create_single_rnn_cell(self.cfg["num_units"]) if self.cfg["use_residual"]: self.word_emb = tf.layers.dense(self.word_emb, units=self.cfg["num_units"], use_bias=False, name="word_input_project") if self.cfg["use_chars"]: self.chars_emb = tf.layers.dense(self.chars_emb, units=self.cfg["num_units"], use_bias=False, name="chars_input_project") rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len, dtype=tf.float32, scope="bi_rnn") rnn_outs = tf.concat(rnn_outs, axis=-1) print("Bi-directional RNN output shape on word: {}".format(rnn_outs.get_shape().as_list())) if self.cfg["use_chars"]: tf.get_variable_scope().reuse_variables() chars_rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.chars_emb, dtype=tf.float32, sequence_length=self.seq_len, scope="bi_rnn") chars_rnn_outs = tf.concat(chars_rnn_outs, axis=-1) print("Bi-directional RNN output shape on chars: {}".format(chars_rnn_outs.get_shape().as_list())) rnn_outs = rnn_outs + chars_rnn_outs rnn_outs = layer_normalize(rnn_outs) with tf.variable_scope("multi_head_attention"): attn_outs = multi_head_attention(rnn_outs, rnn_outs, self.cfg["num_heads"], self.cfg["attention_size"], drop_rate=self.attn_drop_rate, is_train=self.is_train) if self.cfg["use_residual"]: attn_outs = attn_outs + rnn_outs attn_outs = layer_normalize(attn_outs) # residual connection and layer norm print("multi-heads attention output shape: {}".format(attn_outs.get_shape().as_list())) with tf.variable_scope("projection"): self.logits = tf.layers.dense(attn_outs, units=self.tag_vocab_size, use_bias=True) print("logits shape: {}".format(self.logits.get_shape().as_list()))
Example #5
Source File: nns.py From neural_sequence_labeling with MIT License | 5 votes |
def __call__(self, inputs, seq_len, use_last_state=False, time_major=False): assert not time_major, "BiRNN class cannot support time_major currently" with tf.variable_scope(self.scope): flat_inputs = flatten(inputs, keep=2) # reshape to [-1, max_time, dim] seq_len = flatten(seq_len, keep=0) # reshape to [x] (one dimension sequence) outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs, sequence_length=seq_len, dtype=tf.float32) if use_last_state: # return last states output = tf.concat([h_fw, h_bw], axis=-1) # shape = [-1, 2 * num_units] output = reconstruct(output, ref=inputs, keep=2, remove_shape=1) # remove the max_time shape else: output = tf.concat(outputs, axis=-1) # shape = [-1, max_time, 2 * num_units] output = reconstruct(output, ref=inputs, keep=2) # reshape to same as inputs, except the last two dim return output
Example #6
Source File: blstm_cnn_crf_model.py From neural_sequence_labeling with MIT License | 5 votes |
def _build_model_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_rnn_cell() cell_bw = self._create_rnn_cell() if self.cfg["use_stack_rnn"]: rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32, sequence_length=self.seq_len) else: rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len, dtype=tf.float32) rnn_outs = tf.concat(rnn_outs, axis=-1) rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train) if self.cfg["use_residual"]: word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False) rnn_outs = rnn_outs + word_project outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs print("rnn output shape: {}".format(outputs.get_shape().as_list())) if self.cfg["use_attention"] == "self_attention": with tf.variable_scope("self_attention"): attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"], drop_rate=self.drop_rate, is_train=self.is_train) if self.cfg["use_residual"]: attn_outs = attn_outs + outputs outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs print("self-attention output shape: {}".format(outputs.get_shape().as_list())) elif self.cfg["use_attention"] == "normal_attention": with tf.variable_scope("normal_attention"): context = tf.transpose(outputs, [1, 0, 2]) p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False) p_context = tf.transpose(p_context, [1, 0, 2]) attn_cell = AttentionCell(self.cfg["num_units"], context, p_context) # time major based attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True, dtype=tf.float32) outputs = tf.transpose(attn_outs, [1, 0, 2]) print("attention output shape: {}".format(outputs.get_shape().as_list())) with tf.variable_scope("project"): self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True) print("logits shape: {}".format(self.logits.get_shape().as_list()))
Example #7
Source File: dynamic_brnn.py From Automatic_Speech_Recognition with MIT License | 5 votes |
def build_multi_dynamic_brnn(args, maxTimeSteps, inputX, cell_fn, seqLengths, time_major=True): hid_input = inputX for i in range(args.num_layer): scope = 'DBRNN_' + str(i + 1) forward_cell = cell_fn(args.num_hidden, activation=args.activation) backward_cell = cell_fn(args.num_hidden, activation=args.activation) # tensor of shape: [max_time, batch_size, input_size] outputs, output_states = bidirectional_dynamic_rnn(forward_cell, backward_cell, inputs=hid_input, dtype=tf.float32, sequence_length=seqLengths, time_major=True, scope=scope) # forward output, backward ouput # tensor of shape: [max_time, batch_size, input_size] output_fw, output_bw = outputs # forward states, backward states output_state_fw, output_state_bw = output_states # output_fb = tf.concat(2, [output_fw, output_bw]) output_fb = tf.concat([output_fw, output_bw], 2) shape = output_fb.get_shape().as_list() output_fb = tf.reshape(output_fb, [shape[0], shape[1], 2, int(shape[2] / 2)]) hidden = tf.reduce_sum(output_fb, 2) hidden = dropout(hidden, args.keep_prob, (args.mode == 'train')) if i != args.num_layer - 1: hid_input = hidden else: outputXrs = tf.reshape(hidden, [-1, args.num_hidden]) # output_list = tf.split(0, maxTimeSteps, outputXrs) output_list = tf.split(outputXrs, maxTimeSteps, 0) fbHrs = [tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list] return fbHrs
Example #8
Source File: util_rnn.py From SemAIDA with Apache License 2.0 | 5 votes |
def __init__(self, sequence_length, num_classes, channel_num, rnn_hidden_size, attention_size): self.input_x = tf.placeholder(tf.float32, [None, sequence_length, channel_num], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Bidirectional RNN self.rnn_outputs, _ = bi_rnn(GRUCell(rnn_hidden_size), GRUCell(rnn_hidden_size), inputs=self.input_x, dtype=tf.float32) # Attention layer with tf.name_scope('Attention_layer'): self.att_output, alphas = attention(self.rnn_outputs, attention_size, return_alphas=True) tf.summary.histogram('alphas', alphas) # Dropout layer with tf.name_scope("dropout"): self.att_drop = tf.nn.dropout(self.att_output, self.dropout_keep_prob) # FC layer with tf.name_scope("output"): FC_W = tf.get_variable("FC_W", shape=[rnn_hidden_size * 2, num_classes], initializer=tf.contrib.layers.xavier_initializer()) FC_b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="FC_b") self.fc_out = tf.nn.xw_plus_b(self.att_drop, FC_W, FC_b, name="FC_out") self.scores = tf.nn.softmax(self.fc_out, name='scores') self.predictions = tf.argmax(self.scores, 1, name="predictions") with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.fc_out, labels=self.input_y) self.loss = tf.reduce_mean(losses) with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Example #9
Source File: nns.py From Dense_BiLSTM with MIT License | 5 votes |
def __call__(self, inputs, seq_len, return_last_state=False): with tf.variable_scope(self.scope): if return_last_state: _, ((_, output_fw), (_, output_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs, sequence_length=seq_len, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) else: (output_fw, output_bw), _ = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs, sequence_length=seq_len, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) return output
Example #10
Source File: recurrent_layers.py From document-qa with Apache License 2.0 | 5 votes |
def apply(self, is_train, x, mask=None): states = bidirectional_dynamic_rnn(self.cell_spec(is_train), self.cell_spec(is_train), x, mask, dtype=tf.float32)[1] output = [] for state in states: for i,x in enumerate(state._fields): if x == self.output: output.append(state[i]) if self.merge is not None: return self.merge.apply(is_train, output[0], output[1]) else: return tf.concat(output, axis=1)
Example #11
Source File: recurrent_layers.py From document-qa with Apache License 2.0 | 5 votes |
def apply(self, is_train, inputs, mask=None): fw = self.fw(is_train) bw_spec = self.fw if self.bw is None else self.bw bw = bw_spec(is_train) if self.merge is None: return tf.concat(bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory, dtype=tf.float32)[0], 2,) else: fw, bw = bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory, dtype=tf.float32)[0] return self.merge.apply(is_train, fw, bw) # TODO this should be in a different scope
Example #12
Source File: seq2seq_model.py From AmusingPythonCodes with MIT License | 4 votes |
def _build_model(self): with tf.variable_scope("embeddings"): self.source_embs = tf.get_variable(name="source_embs", shape=[self.cfg.source_vocab_size, self.cfg.emb_dim], dtype=tf.float32, trainable=True) self.target_embs = tf.get_variable(name="embeddings", shape=[self.cfg.vocab_size, self.cfg.emb_dim], dtype=tf.float32, trainable=True) source_emb = tf.nn.embedding_lookup(self.source_embs, self.enc_source) target_emb = tf.nn.embedding_lookup(self.target_embs, self.dec_target_in) print("source embedding shape: {}".format(source_emb.get_shape().as_list())) print("target input embedding shape: {}".format(target_emb.get_shape().as_list())) with tf.variable_scope("encoder"): if self.cfg.use_bi_rnn: with tf.variable_scope("bi-directional_rnn"): cell_fw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \ LSTMCell(self.cfg.num_units) cell_bw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \ LSTMCell(self.cfg.num_units) bi_outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, source_emb, dtype=tf.float32, sequence_length=self.enc_seq_len) source_emb = tf.concat(bi_outputs, axis=-1) print("bi-directional rnn output shape: {}".format(source_emb.get_shape().as_list())) input_project = tf.layers.Dense(units=self.cfg.num_units, dtype=tf.float32, name="input_projection") source_emb = input_project(source_emb) print("encoder input projection shape: {}".format(source_emb.get_shape().as_list())) enc_cells = self._create_encoder_cell() self.enc_outputs, self.enc_states = dynamic_rnn(enc_cells, source_emb, sequence_length=self.enc_seq_len, dtype=tf.float32) print("encoder output shape: {}".format(self.enc_outputs.get_shape().as_list())) with tf.variable_scope("decoder"): self.max_dec_seq_len = tf.reduce_max(self.dec_seq_len, name="max_dec_seq_len") self.dec_cells, self.dec_init_states = self._create_decoder_cell() # define input and output projection layer input_project = tf.layers.Dense(units=self.cfg.num_units, name="input_projection") self.dense_layer = tf.layers.Dense(units=self.cfg.vocab_size, name="output_projection") if self.mode == "train": # either "train" or "decode" # for training target_emb = input_project(target_emb) train_helper = TrainingHelper(target_emb, sequence_length=self.dec_seq_len, name="train_helper") train_decoder = BasicDecoder(self.dec_cells, helper=train_helper, output_layer=self.dense_layer, initial_state=self.dec_init_states) self.dec_output, _, _ = dynamic_decode(train_decoder, impute_finished=True, maximum_iterations=self.max_dec_seq_len) print("decoder output shape: {} (vocab size)".format(self.dec_output.rnn_output.get_shape().as_list())) # for decode start_token = tf.ones(shape=[self.batch_size, ], dtype=tf.int32) * self.cfg.target_dict[GO] end_token = self.cfg.target_dict[EOS] def inputs_project(inputs): return input_project(tf.nn.embedding_lookup(self.target_embs, inputs)) dec_helper = GreedyEmbeddingHelper(embedding=inputs_project, start_tokens=start_token, end_token=end_token) infer_decoder = BasicDecoder(self.dec_cells, helper=dec_helper, initial_state=self.dec_init_states, output_layer=self.dense_layer) infer_dec_output, _, _ = dynamic_decode(infer_decoder, maximum_iterations=self.cfg.maximum_iterations) self.dec_predicts = infer_dec_output.sample_id
Example #13
Source File: tf_attention.py From Sarcasm-Detection with MIT License | 4 votes |
def build_attention_model(): # Different placeholders with tf.name_scope('Inputs'): batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='batch_ph') target_ph = tf.placeholder(tf.float32, [None], name='target_ph') seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph') keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph') # Embedding layer with tf.name_scope('Embedding_layer'): embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True) tf.summary.histogram('embeddings_var', embeddings_var) batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph) # (Bi-)RNN layer(-s) rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_UNITS), GRUCell(HIDDEN_UNITS), inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32) tf.summary.histogram('RNN_outputs', rnn_outputs) # Attention layer with tf.name_scope('Attention_layer'): attention_output, alphas = attention(rnn_outputs, ATTENTION_UNITS, return_alphas=True) tf.summary.histogram('alphas', alphas) # Dropout drop = tf.nn.dropout(attention_output, keep_prob_ph) # Fully connected layer with tf.name_scope('Fully_connected_layer'): W = tf.Variable( tf.truncated_normal([HIDDEN_UNITS * 2, 1], stddev=0.1)) # Hidden size is multiplied by 2 for Bi-RNN b = tf.Variable(tf.constant(0., shape=[1])) y_hat = tf.nn.xw_plus_b(drop, W, b) y_hat = tf.squeeze(y_hat) tf.summary.histogram('W', W) with tf.name_scope('Metrics'): # Cross-entropy loss and optimizer initialization loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=target_ph)) tf.summary.scalar('loss', loss) optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss) # Accuracy metric accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), target_ph), tf.float32)) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() # Batch generators train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE) test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE) session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) saver = tf.train.Saver() return batch_ph, target_ph, seq_len_ph, keep_prob_ph, alphas, loss, accuracy, optimizer, merged, \ train_batch_generator, test_batch_generator, session_conf, saver
Example #14
Source File: adversarial_abblstm.py From Text-Classification with Apache License 2.0 | 4 votes |
def build_graph(self, vocab_freq, word2idx): vocab_freqs = tf.constant(self._get_freq(vocab_freq, word2idx), dtype=tf.float32, shape=(self.vocab_size, 1)) weights = vocab_freqs / tf.reduce_sum(vocab_freqs) embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0), trainable=True, name="embedding_var") embedding_norm = normalize(embeddings_var, weights) batch_embedded = tf.nn.embedding_lookup(embedding_norm, self.x) W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1)) W_fc = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1)) b_fc = tf.Variable(tf.constant(0., shape=[self.n_class])) def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"): with tf.variable_scope(scope, reuse=reuse) as scope: rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size), BasicLSTMCell(self.hidden_size), inputs=embedded, dtype=tf.float32) # Attention H = tf.add(rnn_outputs[0], rnn_outputs[1]) # fw + bw M = tf.tanh(H) # M = tanh(H) (batch_size, seq_len, HIDDEN_SIZE) # alpha (bs * sl, 1) alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, self.hidden_size]), tf.reshape(W, [-1, 1]))) r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, self.max_len, 1])) # supposed to be (batch_size * HIDDEN_SIZE, 1) r = tf.squeeze(r) h_star = tf.tanh(r) drop = tf.nn.dropout(h_star, keep_prob) # Fully connected layer(dense layer) y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc) return y_hat, tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label)) logits, self.cls_loss = cal_loss_logit(batch_embedded, self.keep_prob, reuse=False) embedding_perturbated = self._add_perturbation(batch_embedded, self.cls_loss) adv_logits, self.adv_loss = cal_loss_logit(embedding_perturbated, self.keep_prob, reuse=True) self.loss = self.cls_loss + self.adv_loss # optimization loss_to_minimize = self.loss tvars = tf.trainable_variables() gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) grads, global_norm = tf.clip_by_global_norm(gradients, 1.0) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step') self.prediction = tf.argmax(tf.nn.softmax(logits), 1) print("graph built successfully!")
Example #15
Source File: attn_bi_lstm.py From Text-Classification with Apache License 2.0 | 4 votes |
def build_graph(self): print("building graph") # Word embedding embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0), trainable=True) batch_embedded = tf.nn.embedding_lookup(embeddings_var, self.x) rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size), BasicLSTMCell(self.hidden_size), inputs=batch_embedded, dtype=tf.float32) fw_outputs, bw_outputs = rnn_outputs W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1)) H = fw_outputs + bw_outputs # (batch_size, seq_len, HIDDEN_SIZE) M = tf.tanh(H) # M = tanh(H) (batch_size, seq_len, HIDDEN_SIZE) self.alpha = tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(M, [-1, self.hidden_size]), tf.reshape(W, [-1, 1])), (-1, self.max_len))) # batch_size x seq_len r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(self.alpha, [-1, self.max_len, 1])) r = tf.squeeze(r) h_star = tf.tanh(r) # (batch , HIDDEN_SIZE h_drop = tf.nn.dropout(h_star, self.keep_prob) # Fully connected layer(dense layer) FC_W = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1)) FC_b = tf.Variable(tf.constant(0., shape=[self.n_class])) y_hat = tf.nn.xw_plus_b(h_drop, FC_W, FC_b) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label)) # prediction self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1) # optimization loss_to_minimize = self.loss tvars = tf.trainable_variables() gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) grads, global_norm = tf.clip_by_global_norm(gradients, 1.0) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step') print("graph built successfully!")