Python Examples of tensorflow.python.ops.rnn.bidirectional_dynamic

Source File: BidirectLSTMLayer.py From NPNs with GNU General Public License v3.0

5 votes

def __call__(self,inputs,seq_len):
        if self.output_dim % 2 !=0:
            print "The output dimension of BidirectLSTMLayer should be even. "
            exit(-1)
            
        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            scope.reuse_variables()
            cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)
            return rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)

Source File: BidirectLSTMLayer.py From AdaScaling with GNU General Public License v3.0

5 votes

def __call__(self,inputs,seq_len):
        if self.output_dim % 2 !=0:
            print "The output dimension of BidirectLSTMLayer should be even. "
            exit(-1)
            
        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            #scope.reuse_variables()
            f_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            b_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)
            return rnn.bidirectional_dynamic_rnn(f_cell,b_cell,inputs,seq_len,dtype = inputs.dtype)

Source File: rnns.py From AmusingPythonCodes with MIT License

5 votes

def __call__(self, inputs, seq_len, return_last_state=False, time_major=False):
        assert not time_major, "BiRNN class cannot support time_major currently"
        with tf.variable_scope(self.scope):
            flat_inputs = flatten(inputs, keep=2)  # reshape to [-1, max_time, dim]
            seq_len = flatten(seq_len, keep=0)  # reshape to [x] (one dimension sequence)
            outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs,
                                                                        sequence_length=seq_len, dtype=tf.float32)
            if return_last_state:  # return last states
                output = tf.concat([h_fw, h_bw], axis=-1)  # shape = [-1, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2, remove_shape=1)  # remove the max_time shape
            else:
                output = tf.concat(outputs, axis=-1)  # shape = [-1, max_time, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2)  # reshape to same as inputs, except the last two dim
            return output

Source File: multi_attention_model.py From neural_sequence_labeling with MIT License

5 votes

def _build_model_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_single_rnn_cell(self.cfg["num_units"])
            cell_bw = self._create_single_rnn_cell(self.cfg["num_units"])
            if self.cfg["use_residual"]:
                self.word_emb = tf.layers.dense(self.word_emb, units=self.cfg["num_units"], use_bias=False,
                                                name="word_input_project")
                if self.cfg["use_chars"]:
                    self.chars_emb = tf.layers.dense(self.chars_emb, units=self.cfg["num_units"], use_bias=False,
                                                     name="chars_input_project")

            rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
                                                    dtype=tf.float32, scope="bi_rnn")
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            print("Bi-directional RNN output shape on word: {}".format(rnn_outs.get_shape().as_list()))
            if self.cfg["use_chars"]:
                tf.get_variable_scope().reuse_variables()
                chars_rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.chars_emb, dtype=tf.float32,
                                                              sequence_length=self.seq_len, scope="bi_rnn")
                chars_rnn_outs = tf.concat(chars_rnn_outs, axis=-1)
                print("Bi-directional RNN output shape on chars: {}".format(chars_rnn_outs.get_shape().as_list()))
                rnn_outs = rnn_outs + chars_rnn_outs
            rnn_outs = layer_normalize(rnn_outs)

        with tf.variable_scope("multi_head_attention"):
            attn_outs = multi_head_attention(rnn_outs, rnn_outs, self.cfg["num_heads"], self.cfg["attention_size"],
                                             drop_rate=self.attn_drop_rate, is_train=self.is_train)
            if self.cfg["use_residual"]:
                attn_outs = attn_outs + rnn_outs
            attn_outs = layer_normalize(attn_outs)  # residual connection and layer norm
            print("multi-heads attention output shape: {}".format(attn_outs.get_shape().as_list()))

        with tf.variable_scope("projection"):
            self.logits = tf.layers.dense(attn_outs, units=self.tag_vocab_size, use_bias=True)
            print("logits shape: {}".format(self.logits.get_shape().as_list()))

Source File: nns.py From neural_sequence_labeling with MIT License

5 votes

def __call__(self, inputs, seq_len, use_last_state=False, time_major=False):
        assert not time_major, "BiRNN class cannot support time_major currently"
        with tf.variable_scope(self.scope):
            flat_inputs = flatten(inputs, keep=2)  # reshape to [-1, max_time, dim]
            seq_len = flatten(seq_len, keep=0)  # reshape to [x] (one dimension sequence)
            outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs,
                                                                        sequence_length=seq_len, dtype=tf.float32)
            if use_last_state:  # return last states
                output = tf.concat([h_fw, h_bw], axis=-1)  # shape = [-1, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2, remove_shape=1)  # remove the max_time shape
            else:
                output = tf.concat(outputs, axis=-1)  # shape = [-1, max_time, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2)  # reshape to same as inputs, except the last two dim
            return output

Source File: blstm_cnn_crf_model.py From neural_sequence_labeling with MIT License

5 votes

def _build_model_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_rnn_cell()
            cell_bw = self._create_rnn_cell()
            if self.cfg["use_stack_rnn"]:
                rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32,
                                                               sequence_length=self.seq_len)
            else:
                rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
                                                         dtype=tf.float32)
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train)
            if self.cfg["use_residual"]:
                word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False)
                rnn_outs = rnn_outs + word_project
            outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs
            print("rnn output shape: {}".format(outputs.get_shape().as_list()))

        if self.cfg["use_attention"] == "self_attention":
            with tf.variable_scope("self_attention"):
                attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"],
                                                 drop_rate=self.drop_rate, is_train=self.is_train)
                if self.cfg["use_residual"]:
                    attn_outs = attn_outs + outputs
                outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs
                print("self-attention output shape: {}".format(outputs.get_shape().as_list()))

        elif self.cfg["use_attention"] == "normal_attention":
            with tf.variable_scope("normal_attention"):
                context = tf.transpose(outputs, [1, 0, 2])
                p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False)
                p_context = tf.transpose(p_context, [1, 0, 2])
                attn_cell = AttentionCell(self.cfg["num_units"], context, p_context)  # time major based
                attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True,
                                           dtype=tf.float32)
                outputs = tf.transpose(attn_outs, [1, 0, 2])
                print("attention output shape: {}".format(outputs.get_shape().as_list()))

        with tf.variable_scope("project"):
            self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)
            print("logits shape: {}".format(self.logits.get_shape().as_list()))

Source File: dynamic_brnn.py From Automatic_Speech_Recognition with MIT License

5 votes

def build_multi_dynamic_brnn(args,
                             maxTimeSteps,
                             inputX,
                             cell_fn,
                             seqLengths,
                             time_major=True):
    hid_input = inputX
    for i in range(args.num_layer):
        scope = 'DBRNN_' + str(i + 1)
        forward_cell = cell_fn(args.num_hidden, activation=args.activation)
        backward_cell = cell_fn(args.num_hidden, activation=args.activation)
        # tensor of shape: [max_time, batch_size, input_size]
        outputs, output_states = bidirectional_dynamic_rnn(forward_cell, backward_cell,
                                                           inputs=hid_input,
                                                           dtype=tf.float32,
                                                           sequence_length=seqLengths,
                                                           time_major=True,
                                                           scope=scope)
        # forward output, backward ouput
        # tensor of shape: [max_time, batch_size, input_size]
        output_fw, output_bw = outputs
        # forward states, backward states
        output_state_fw, output_state_bw = output_states
        # output_fb = tf.concat(2, [output_fw, output_bw])
        output_fb = tf.concat([output_fw, output_bw], 2)
        shape = output_fb.get_shape().as_list()
        output_fb = tf.reshape(output_fb, [shape[0], shape[1], 2, int(shape[2] / 2)])
        hidden = tf.reduce_sum(output_fb, 2)
        hidden = dropout(hidden, args.keep_prob, (args.mode == 'train'))

        if i != args.num_layer - 1:
            hid_input = hidden
        else:
            outputXrs = tf.reshape(hidden, [-1, args.num_hidden])
            # output_list = tf.split(0, maxTimeSteps, outputXrs)
            output_list = tf.split(outputXrs, maxTimeSteps, 0)
            fbHrs = [tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list]
    return fbHrs

Source File: util_rnn.py From SemAIDA with Apache License 2.0

5 votes

def __init__(self, sequence_length, num_classes, channel_num, rnn_hidden_size, attention_size):
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, channel_num], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Bidirectional RNN
        self.rnn_outputs, _ = bi_rnn(GRUCell(rnn_hidden_size), GRUCell(rnn_hidden_size),
                                     inputs=self.input_x, dtype=tf.float32)

        # Attention layer
        with tf.name_scope('Attention_layer'):
            self.att_output, alphas = attention(self.rnn_outputs, attention_size, return_alphas=True)
            tf.summary.histogram('alphas', alphas)

        # Dropout layer
        with tf.name_scope("dropout"):
            self.att_drop = tf.nn.dropout(self.att_output, self.dropout_keep_prob)

        # FC layer
        with tf.name_scope("output"):
            FC_W = tf.get_variable("FC_W", shape=[rnn_hidden_size * 2, num_classes],
                                   initializer=tf.contrib.layers.xavier_initializer())
            FC_b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="FC_b")
            self.fc_out = tf.nn.xw_plus_b(self.att_drop, FC_W, FC_b, name="FC_out")
            self.scores = tf.nn.softmax(self.fc_out, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.fc_out, labels=self.input_y)
            self.loss = tf.reduce_mean(losses)

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

Source File: nns.py From Dense_BiLSTM with MIT License

5 votes

def __call__(self, inputs, seq_len, return_last_state=False):
        with tf.variable_scope(self.scope):
            if return_last_state:
                _, ((_, output_fw), (_, output_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs,
                                                                                sequence_length=seq_len,
                                                                                dtype=tf.float32)
                output = tf.concat([output_fw, output_bw], axis=-1)
            else:
                (output_fw, output_bw), _ = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs,
                                                                      sequence_length=seq_len, dtype=tf.float32)
                output = tf.concat([output_fw, output_bw], axis=-1)
        return output

Source File: recurrent_layers.py From document-qa with Apache License 2.0

5 votes

def apply(self, is_train, x, mask=None):
        states = bidirectional_dynamic_rnn(self.cell_spec(is_train), self.cell_spec(is_train), x, mask, dtype=tf.float32)[1]
        output = []
        for state in states:
            for i,x in enumerate(state._fields):
                if x == self.output:
                    output.append(state[i])
        if self.merge is not None:
            return self.merge.apply(is_train, output[0], output[1])
        else:
            return tf.concat(output, axis=1)

Source File: recurrent_layers.py From document-qa with Apache License 2.0

5 votes

def apply(self, is_train, inputs, mask=None):
        fw = self.fw(is_train)
        bw_spec = self.fw if self.bw is None else self.bw
        bw = bw_spec(is_train)

        if self.merge is None:
            return tf.concat(bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory,
                                                       dtype=tf.float32)[0], 2,)
        else:
            fw, bw = bidirectional_dynamic_rnn(fw, bw, inputs, mask,
                                               swap_memory=self.swap_memory, dtype=tf.float32)[0]
            return self.merge.apply(is_train, fw, bw)  # TODO this should be in a different scope

Source File: seq2seq_model.py From AmusingPythonCodes with MIT License

4 votes

def _build_model(self):
        with tf.variable_scope("embeddings"):
            self.source_embs = tf.get_variable(name="source_embs", shape=[self.cfg.source_vocab_size, self.cfg.emb_dim],
                                               dtype=tf.float32, trainable=True)
            self.target_embs = tf.get_variable(name="embeddings", shape=[self.cfg.vocab_size, self.cfg.emb_dim],
                                               dtype=tf.float32, trainable=True)
            source_emb = tf.nn.embedding_lookup(self.source_embs, self.enc_source)
            target_emb = tf.nn.embedding_lookup(self.target_embs, self.dec_target_in)
            print("source embedding shape: {}".format(source_emb.get_shape().as_list()))
            print("target input embedding shape: {}".format(target_emb.get_shape().as_list()))

        with tf.variable_scope("encoder"):
            if self.cfg.use_bi_rnn:
                with tf.variable_scope("bi-directional_rnn"):
                    cell_fw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    cell_bw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    bi_outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, source_emb, dtype=tf.float32,
                                                              sequence_length=self.enc_seq_len)
                    source_emb = tf.concat(bi_outputs, axis=-1)
                    print("bi-directional rnn output shape: {}".format(source_emb.get_shape().as_list()))
            input_project = tf.layers.Dense(units=self.cfg.num_units, dtype=tf.float32, name="input_projection")
            source_emb = input_project(source_emb)
            print("encoder input projection shape: {}".format(source_emb.get_shape().as_list()))
            enc_cells = self._create_encoder_cell()
            self.enc_outputs, self.enc_states = dynamic_rnn(enc_cells, source_emb, sequence_length=self.enc_seq_len,
                                                            dtype=tf.float32)
            print("encoder output shape: {}".format(self.enc_outputs.get_shape().as_list()))

        with tf.variable_scope("decoder"):
            self.max_dec_seq_len = tf.reduce_max(self.dec_seq_len, name="max_dec_seq_len")
            self.dec_cells, self.dec_init_states = self._create_decoder_cell()
            # define input and output projection layer
            input_project = tf.layers.Dense(units=self.cfg.num_units, name="input_projection")
            self.dense_layer = tf.layers.Dense(units=self.cfg.vocab_size, name="output_projection")
            if self.mode == "train":  # either "train" or "decode"
                # for training
                target_emb = input_project(target_emb)
                train_helper = TrainingHelper(target_emb, sequence_length=self.dec_seq_len, name="train_helper")
                train_decoder = BasicDecoder(self.dec_cells, helper=train_helper, output_layer=self.dense_layer,
                                             initial_state=self.dec_init_states)
                self.dec_output, _, _ = dynamic_decode(train_decoder, impute_finished=True,
                                                       maximum_iterations=self.max_dec_seq_len)
                print("decoder output shape: {} (vocab size)".format(self.dec_output.rnn_output.get_shape().as_list()))

                # for decode
                start_token = tf.ones(shape=[self.batch_size, ], dtype=tf.int32) * self.cfg.target_dict[GO]
                end_token = self.cfg.target_dict[EOS]

                def inputs_project(inputs):
                    return input_project(tf.nn.embedding_lookup(self.target_embs, inputs))

                dec_helper = GreedyEmbeddingHelper(embedding=inputs_project, start_tokens=start_token,
                                                   end_token=end_token)
                infer_decoder = BasicDecoder(self.dec_cells, helper=dec_helper, initial_state=self.dec_init_states,
                                             output_layer=self.dense_layer)
                infer_dec_output, _, _ = dynamic_decode(infer_decoder, maximum_iterations=self.cfg.maximum_iterations)
                self.dec_predicts = infer_dec_output.sample_id

Source File: tf_attention.py From Sarcasm-Detection with MIT License

4 votes

def build_attention_model():
    # Different placeholders
    with tf.name_scope('Inputs'):
        batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='batch_ph')
        target_ph = tf.placeholder(tf.float32, [None], name='target_ph')
        seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
        keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph')

    # Embedding layer
    with tf.name_scope('Embedding_layer'):
        embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
        tf.summary.histogram('embeddings_var', embeddings_var)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)

    # (Bi-)RNN layer(-s)
    rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_UNITS), GRUCell(HIDDEN_UNITS),
                            inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
    tf.summary.histogram('RNN_outputs', rnn_outputs)

    # Attention layer
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(rnn_outputs, ATTENTION_UNITS, return_alphas=True)
        tf.summary.histogram('alphas', alphas)

    # Dropout
    drop = tf.nn.dropout(attention_output, keep_prob_ph)

    # Fully connected layer
    with tf.name_scope('Fully_connected_layer'):
        W = tf.Variable(
            tf.truncated_normal([HIDDEN_UNITS * 2, 1], stddev=0.1))  # Hidden size is multiplied by 2 for Bi-RNN
        b = tf.Variable(tf.constant(0., shape=[1]))
        y_hat = tf.nn.xw_plus_b(drop, W, b)
        y_hat = tf.squeeze(y_hat)
        tf.summary.histogram('W', W)

    with tf.name_scope('Metrics'):
        # Cross-entropy loss and optimizer initialization
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=target_ph))
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

        # Accuracy metric
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), target_ph), tf.float32))
        tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()

    # Batch generators
    train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE)
    test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE)
    session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
    saver = tf.train.Saver()
    return batch_ph, target_ph, seq_len_ph, keep_prob_ph, alphas, loss, accuracy, optimizer, merged, \
           train_batch_generator, test_batch_generator, session_conf, saver

Source File: adversarial_abblstm.py From Text-Classification with Apache License 2.0

4 votes

def build_graph(self, vocab_freq, word2idx):
        vocab_freqs = tf.constant(self._get_freq(vocab_freq, word2idx),
                                  dtype=tf.float32, shape=(self.vocab_size, 1))
        weights = vocab_freqs / tf.reduce_sum(vocab_freqs)
        embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),
                                     trainable=True, name="embedding_var")
        embedding_norm = normalize(embeddings_var, weights)
        batch_embedded = tf.nn.embedding_lookup(embedding_norm, self.x)

        W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1))
        W_fc = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1))
        b_fc = tf.Variable(tf.constant(0., shape=[self.n_class]))

        def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"):
            with tf.variable_scope(scope, reuse=reuse) as scope:
                rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                        BasicLSTMCell(self.hidden_size),
                                        inputs=embedded, dtype=tf.float32)

                # Attention
                H = tf.add(rnn_outputs[0], rnn_outputs[1])  # fw + bw
                M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)
                # alpha (bs * sl, 1)
                alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                                                tf.reshape(W, [-1, 1])))
                r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, self.max_len,
                                                                             1]))  # supposed to be (batch_size * HIDDEN_SIZE, 1)
                r = tf.squeeze(r)
                h_star = tf.tanh(r)
                drop = tf.nn.dropout(h_star, keep_prob)

                # Fully connected layer（dense layer)
                y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc)

            return y_hat, tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))

        logits, self.cls_loss = cal_loss_logit(batch_embedded, self.keep_prob, reuse=False)
        embedding_perturbated = self._add_perturbation(batch_embedded, self.cls_loss)
        adv_logits, self.adv_loss = cal_loss_logit(embedding_perturbated, self.keep_prob, reuse=True)
        self.loss = self.cls_loss + self.adv_loss

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step,
                                                       name='train_step')
        self.prediction = tf.argmax(tf.nn.softmax(logits), 1)

        print("graph built successfully!")

Source File: attn_bi_lstm.py From Text-Classification with Apache License 2.0

4 votes

def build_graph(self):
        print("building graph")
        # Word embedding
        embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),
                                     trainable=True)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, self.x)

        rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                BasicLSTMCell(self.hidden_size),
                                inputs=batch_embedded, dtype=tf.float32)

        fw_outputs, bw_outputs = rnn_outputs

        W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1))
        H = fw_outputs + bw_outputs  # (batch_size, seq_len, HIDDEN_SIZE)
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

        self.alpha = tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                                                        tf.reshape(W, [-1, 1])),
                                              (-1, self.max_len)))  # batch_size x seq_len
        r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                      tf.reshape(self.alpha, [-1, self.max_len, 1]))
        r = tf.squeeze(r)
        h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE

        h_drop = tf.nn.dropout(h_star, self.keep_prob)

        # Fully connected layer（dense layer)
        FC_W = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1))
        FC_b = tf.Variable(tf.constant(0., shape=[self.n_class]))
        y_hat = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))

        # prediction
        self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1)

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step,
                                                       name='train_step')
        print("graph built successfully!")

Python tensorflow.python.ops.rnn.bidirectional_dynamic_rnn() Examples