Python tensorflow.python.ops.rnn.bidirectional_dynamic_rnn() Examples

The following are 15 code examples of tensorflow.python.ops.rnn.bidirectional_dynamic_rnn(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.ops.rnn , or try the search function .
Example #1
Source File: BidirectLSTMLayer.py    From NPNs with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self,inputs,seq_len):
        if self.output_dim % 2 !=0:
            print "The output dimension of BidirectLSTMLayer should be even. "
            exit(-1)
            
        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            scope.reuse_variables()
            cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)
            return rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype) 
Example #2
Source File: BidirectLSTMLayer.py    From AdaScaling with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self,inputs,seq_len):
        if self.output_dim % 2 !=0:
            print "The output dimension of BidirectLSTMLayer should be even. "
            exit(-1)
            
        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            #scope.reuse_variables()
            f_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            b_cell = LSTMCell(self.output_dim /2 ,initializer = self.initializer(dtype = inputs.dtype))
            #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype)
            return rnn.bidirectional_dynamic_rnn(f_cell,b_cell,inputs,seq_len,dtype = inputs.dtype) 
Example #3
Source File: rnns.py    From AmusingPythonCodes with MIT License 5 votes vote down vote up
def __call__(self, inputs, seq_len, return_last_state=False, time_major=False):
        assert not time_major, "BiRNN class cannot support time_major currently"
        with tf.variable_scope(self.scope):
            flat_inputs = flatten(inputs, keep=2)  # reshape to [-1, max_time, dim]
            seq_len = flatten(seq_len, keep=0)  # reshape to [x] (one dimension sequence)
            outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs,
                                                                        sequence_length=seq_len, dtype=tf.float32)
            if return_last_state:  # return last states
                output = tf.concat([h_fw, h_bw], axis=-1)  # shape = [-1, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2, remove_shape=1)  # remove the max_time shape
            else:
                output = tf.concat(outputs, axis=-1)  # shape = [-1, max_time, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2)  # reshape to same as inputs, except the last two dim
            return output 
Example #4
Source File: multi_attention_model.py    From neural_sequence_labeling with MIT License 5 votes vote down vote up
def _build_model_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_single_rnn_cell(self.cfg["num_units"])
            cell_bw = self._create_single_rnn_cell(self.cfg["num_units"])
            if self.cfg["use_residual"]:
                self.word_emb = tf.layers.dense(self.word_emb, units=self.cfg["num_units"], use_bias=False,
                                                name="word_input_project")
                if self.cfg["use_chars"]:
                    self.chars_emb = tf.layers.dense(self.chars_emb, units=self.cfg["num_units"], use_bias=False,
                                                     name="chars_input_project")

            rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
                                                    dtype=tf.float32, scope="bi_rnn")
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            print("Bi-directional RNN output shape on word: {}".format(rnn_outs.get_shape().as_list()))
            if self.cfg["use_chars"]:
                tf.get_variable_scope().reuse_variables()
                chars_rnn_outs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.chars_emb, dtype=tf.float32,
                                                              sequence_length=self.seq_len, scope="bi_rnn")
                chars_rnn_outs = tf.concat(chars_rnn_outs, axis=-1)
                print("Bi-directional RNN output shape on chars: {}".format(chars_rnn_outs.get_shape().as_list()))
                rnn_outs = rnn_outs + chars_rnn_outs
            rnn_outs = layer_normalize(rnn_outs)

        with tf.variable_scope("multi_head_attention"):
            attn_outs = multi_head_attention(rnn_outs, rnn_outs, self.cfg["num_heads"], self.cfg["attention_size"],
                                             drop_rate=self.attn_drop_rate, is_train=self.is_train)
            if self.cfg["use_residual"]:
                attn_outs = attn_outs + rnn_outs
            attn_outs = layer_normalize(attn_outs)  # residual connection and layer norm
            print("multi-heads attention output shape: {}".format(attn_outs.get_shape().as_list()))

        with tf.variable_scope("projection"):
            self.logits = tf.layers.dense(attn_outs, units=self.tag_vocab_size, use_bias=True)
            print("logits shape: {}".format(self.logits.get_shape().as_list())) 
Example #5
Source File: nns.py    From neural_sequence_labeling with MIT License 5 votes vote down vote up
def __call__(self, inputs, seq_len, use_last_state=False, time_major=False):
        assert not time_major, "BiRNN class cannot support time_major currently"
        with tf.variable_scope(self.scope):
            flat_inputs = flatten(inputs, keep=2)  # reshape to [-1, max_time, dim]
            seq_len = flatten(seq_len, keep=0)  # reshape to [x] (one dimension sequence)
            outputs, ((_, h_fw), (_, h_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, flat_inputs,
                                                                        sequence_length=seq_len, dtype=tf.float32)
            if use_last_state:  # return last states
                output = tf.concat([h_fw, h_bw], axis=-1)  # shape = [-1, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2, remove_shape=1)  # remove the max_time shape
            else:
                output = tf.concat(outputs, axis=-1)  # shape = [-1, max_time, 2 * num_units]
                output = reconstruct(output, ref=inputs, keep=2)  # reshape to same as inputs, except the last two dim
            return output 
Example #6
Source File: blstm_cnn_crf_model.py    From neural_sequence_labeling with MIT License 5 votes vote down vote up
def _build_model_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_rnn_cell()
            cell_bw = self._create_rnn_cell()
            if self.cfg["use_stack_rnn"]:
                rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32,
                                                               sequence_length=self.seq_len)
            else:
                rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
                                                         dtype=tf.float32)
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train)
            if self.cfg["use_residual"]:
                word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False)
                rnn_outs = rnn_outs + word_project
            outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs
            print("rnn output shape: {}".format(outputs.get_shape().as_list()))

        if self.cfg["use_attention"] == "self_attention":
            with tf.variable_scope("self_attention"):
                attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"],
                                                 drop_rate=self.drop_rate, is_train=self.is_train)
                if self.cfg["use_residual"]:
                    attn_outs = attn_outs + outputs
                outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs
                print("self-attention output shape: {}".format(outputs.get_shape().as_list()))

        elif self.cfg["use_attention"] == "normal_attention":
            with tf.variable_scope("normal_attention"):
                context = tf.transpose(outputs, [1, 0, 2])
                p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False)
                p_context = tf.transpose(p_context, [1, 0, 2])
                attn_cell = AttentionCell(self.cfg["num_units"], context, p_context)  # time major based
                attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True,
                                           dtype=tf.float32)
                outputs = tf.transpose(attn_outs, [1, 0, 2])
                print("attention output shape: {}".format(outputs.get_shape().as_list()))

        with tf.variable_scope("project"):
            self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)
            print("logits shape: {}".format(self.logits.get_shape().as_list())) 
Example #7
Source File: dynamic_brnn.py    From Automatic_Speech_Recognition with MIT License 5 votes vote down vote up
def build_multi_dynamic_brnn(args,
                             maxTimeSteps,
                             inputX,
                             cell_fn,
                             seqLengths,
                             time_major=True):
    hid_input = inputX
    for i in range(args.num_layer):
        scope = 'DBRNN_' + str(i + 1)
        forward_cell = cell_fn(args.num_hidden, activation=args.activation)
        backward_cell = cell_fn(args.num_hidden, activation=args.activation)
        # tensor of shape: [max_time, batch_size, input_size]
        outputs, output_states = bidirectional_dynamic_rnn(forward_cell, backward_cell,
                                                           inputs=hid_input,
                                                           dtype=tf.float32,
                                                           sequence_length=seqLengths,
                                                           time_major=True,
                                                           scope=scope)
        # forward output, backward ouput
        # tensor of shape: [max_time, batch_size, input_size]
        output_fw, output_bw = outputs
        # forward states, backward states
        output_state_fw, output_state_bw = output_states
        # output_fb = tf.concat(2, [output_fw, output_bw])
        output_fb = tf.concat([output_fw, output_bw], 2)
        shape = output_fb.get_shape().as_list()
        output_fb = tf.reshape(output_fb, [shape[0], shape[1], 2, int(shape[2] / 2)])
        hidden = tf.reduce_sum(output_fb, 2)
        hidden = dropout(hidden, args.keep_prob, (args.mode == 'train'))

        if i != args.num_layer - 1:
            hid_input = hidden
        else:
            outputXrs = tf.reshape(hidden, [-1, args.num_hidden])
            # output_list = tf.split(0, maxTimeSteps, outputXrs)
            output_list = tf.split(outputXrs, maxTimeSteps, 0)
            fbHrs = [tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list]
    return fbHrs 
Example #8
Source File: util_rnn.py    From SemAIDA with Apache License 2.0 5 votes vote down vote up
def __init__(self, sequence_length, num_classes, channel_num, rnn_hidden_size, attention_size):
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, channel_num], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Bidirectional RNN
        self.rnn_outputs, _ = bi_rnn(GRUCell(rnn_hidden_size), GRUCell(rnn_hidden_size),
                                     inputs=self.input_x, dtype=tf.float32)

        # Attention layer
        with tf.name_scope('Attention_layer'):
            self.att_output, alphas = attention(self.rnn_outputs, attention_size, return_alphas=True)
            tf.summary.histogram('alphas', alphas)

        # Dropout layer
        with tf.name_scope("dropout"):
            self.att_drop = tf.nn.dropout(self.att_output, self.dropout_keep_prob)

        # FC layer
        with tf.name_scope("output"):
            FC_W = tf.get_variable("FC_W", shape=[rnn_hidden_size * 2, num_classes],
                                   initializer=tf.contrib.layers.xavier_initializer())
            FC_b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="FC_b")
            self.fc_out = tf.nn.xw_plus_b(self.att_drop, FC_W, FC_b, name="FC_out")
            self.scores = tf.nn.softmax(self.fc_out, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.fc_out, labels=self.input_y)
            self.loss = tf.reduce_mean(losses)

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") 
Example #9
Source File: nns.py    From Dense_BiLSTM with MIT License 5 votes vote down vote up
def __call__(self, inputs, seq_len, return_last_state=False):
        with tf.variable_scope(self.scope):
            if return_last_state:
                _, ((_, output_fw), (_, output_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs,
                                                                                sequence_length=seq_len,
                                                                                dtype=tf.float32)
                output = tf.concat([output_fw, output_bw], axis=-1)
            else:
                (output_fw, output_bw), _ = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs,
                                                                      sequence_length=seq_len, dtype=tf.float32)
                output = tf.concat([output_fw, output_bw], axis=-1)
        return output 
Example #10
Source File: recurrent_layers.py    From document-qa with Apache License 2.0 5 votes vote down vote up
def apply(self, is_train, x, mask=None):
        states = bidirectional_dynamic_rnn(self.cell_spec(is_train), self.cell_spec(is_train), x, mask, dtype=tf.float32)[1]
        output = []
        for state in states:
            for i,x in enumerate(state._fields):
                if x == self.output:
                    output.append(state[i])
        if self.merge is not None:
            return self.merge.apply(is_train, output[0], output[1])
        else:
            return tf.concat(output, axis=1) 
Example #11
Source File: recurrent_layers.py    From document-qa with Apache License 2.0 5 votes vote down vote up
def apply(self, is_train, inputs, mask=None):
        fw = self.fw(is_train)
        bw_spec = self.fw if self.bw is None else self.bw
        bw = bw_spec(is_train)

        if self.merge is None:
            return tf.concat(bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory,
                                                       dtype=tf.float32)[0], 2,)
        else:
            fw, bw = bidirectional_dynamic_rnn(fw, bw, inputs, mask,
                                               swap_memory=self.swap_memory, dtype=tf.float32)[0]
            return self.merge.apply(is_train, fw, bw)  # TODO this should be in a different scope 
Example #12
Source File: seq2seq_model.py    From AmusingPythonCodes with MIT License 4 votes vote down vote up
def _build_model(self):
        with tf.variable_scope("embeddings"):
            self.source_embs = tf.get_variable(name="source_embs", shape=[self.cfg.source_vocab_size, self.cfg.emb_dim],
                                               dtype=tf.float32, trainable=True)
            self.target_embs = tf.get_variable(name="embeddings", shape=[self.cfg.vocab_size, self.cfg.emb_dim],
                                               dtype=tf.float32, trainable=True)
            source_emb = tf.nn.embedding_lookup(self.source_embs, self.enc_source)
            target_emb = tf.nn.embedding_lookup(self.target_embs, self.dec_target_in)
            print("source embedding shape: {}".format(source_emb.get_shape().as_list()))
            print("target input embedding shape: {}".format(target_emb.get_shape().as_list()))

        with tf.variable_scope("encoder"):
            if self.cfg.use_bi_rnn:
                with tf.variable_scope("bi-directional_rnn"):
                    cell_fw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    cell_bw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    bi_outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, source_emb, dtype=tf.float32,
                                                              sequence_length=self.enc_seq_len)
                    source_emb = tf.concat(bi_outputs, axis=-1)
                    print("bi-directional rnn output shape: {}".format(source_emb.get_shape().as_list()))
            input_project = tf.layers.Dense(units=self.cfg.num_units, dtype=tf.float32, name="input_projection")
            source_emb = input_project(source_emb)
            print("encoder input projection shape: {}".format(source_emb.get_shape().as_list()))
            enc_cells = self._create_encoder_cell()
            self.enc_outputs, self.enc_states = dynamic_rnn(enc_cells, source_emb, sequence_length=self.enc_seq_len,
                                                            dtype=tf.float32)
            print("encoder output shape: {}".format(self.enc_outputs.get_shape().as_list()))

        with tf.variable_scope("decoder"):
            self.max_dec_seq_len = tf.reduce_max(self.dec_seq_len, name="max_dec_seq_len")
            self.dec_cells, self.dec_init_states = self._create_decoder_cell()
            # define input and output projection layer
            input_project = tf.layers.Dense(units=self.cfg.num_units, name="input_projection")
            self.dense_layer = tf.layers.Dense(units=self.cfg.vocab_size, name="output_projection")
            if self.mode == "train":  # either "train" or "decode"
                # for training
                target_emb = input_project(target_emb)
                train_helper = TrainingHelper(target_emb, sequence_length=self.dec_seq_len, name="train_helper")
                train_decoder = BasicDecoder(self.dec_cells, helper=train_helper, output_layer=self.dense_layer,
                                             initial_state=self.dec_init_states)
                self.dec_output, _, _ = dynamic_decode(train_decoder, impute_finished=True,
                                                       maximum_iterations=self.max_dec_seq_len)
                print("decoder output shape: {} (vocab size)".format(self.dec_output.rnn_output.get_shape().as_list()))

                # for decode
                start_token = tf.ones(shape=[self.batch_size, ], dtype=tf.int32) * self.cfg.target_dict[GO]
                end_token = self.cfg.target_dict[EOS]

                def inputs_project(inputs):
                    return input_project(tf.nn.embedding_lookup(self.target_embs, inputs))

                dec_helper = GreedyEmbeddingHelper(embedding=inputs_project, start_tokens=start_token,
                                                   end_token=end_token)
                infer_decoder = BasicDecoder(self.dec_cells, helper=dec_helper, initial_state=self.dec_init_states,
                                             output_layer=self.dense_layer)
                infer_dec_output, _, _ = dynamic_decode(infer_decoder, maximum_iterations=self.cfg.maximum_iterations)
                self.dec_predicts = infer_dec_output.sample_id 
Example #13
Source File: tf_attention.py    From Sarcasm-Detection with MIT License 4 votes vote down vote up
def build_attention_model():
    # Different placeholders
    with tf.name_scope('Inputs'):
        batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='batch_ph')
        target_ph = tf.placeholder(tf.float32, [None], name='target_ph')
        seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
        keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph')

    # Embedding layer
    with tf.name_scope('Embedding_layer'):
        embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
        tf.summary.histogram('embeddings_var', embeddings_var)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)

    # (Bi-)RNN layer(-s)
    rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_UNITS), GRUCell(HIDDEN_UNITS),
                            inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
    tf.summary.histogram('RNN_outputs', rnn_outputs)

    # Attention layer
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(rnn_outputs, ATTENTION_UNITS, return_alphas=True)
        tf.summary.histogram('alphas', alphas)

    # Dropout
    drop = tf.nn.dropout(attention_output, keep_prob_ph)

    # Fully connected layer
    with tf.name_scope('Fully_connected_layer'):
        W = tf.Variable(
            tf.truncated_normal([HIDDEN_UNITS * 2, 1], stddev=0.1))  # Hidden size is multiplied by 2 for Bi-RNN
        b = tf.Variable(tf.constant(0., shape=[1]))
        y_hat = tf.nn.xw_plus_b(drop, W, b)
        y_hat = tf.squeeze(y_hat)
        tf.summary.histogram('W', W)

    with tf.name_scope('Metrics'):
        # Cross-entropy loss and optimizer initialization
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=target_ph))
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

        # Accuracy metric
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), target_ph), tf.float32))
        tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()

    # Batch generators
    train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE)
    test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE)
    session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
    saver = tf.train.Saver()
    return batch_ph, target_ph, seq_len_ph, keep_prob_ph, alphas, loss, accuracy, optimizer, merged, \
           train_batch_generator, test_batch_generator, session_conf, saver 
Example #14
Source File: adversarial_abblstm.py    From Text-Classification with Apache License 2.0 4 votes vote down vote up
def build_graph(self, vocab_freq, word2idx):
        vocab_freqs = tf.constant(self._get_freq(vocab_freq, word2idx),
                                  dtype=tf.float32, shape=(self.vocab_size, 1))
        weights = vocab_freqs / tf.reduce_sum(vocab_freqs)
        embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),
                                     trainable=True, name="embedding_var")
        embedding_norm = normalize(embeddings_var, weights)
        batch_embedded = tf.nn.embedding_lookup(embedding_norm, self.x)

        W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1))
        W_fc = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1))
        b_fc = tf.Variable(tf.constant(0., shape=[self.n_class]))

        def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"):
            with tf.variable_scope(scope, reuse=reuse) as scope:
                rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                        BasicLSTMCell(self.hidden_size),
                                        inputs=embedded, dtype=tf.float32)

                # Attention
                H = tf.add(rnn_outputs[0], rnn_outputs[1])  # fw + bw
                M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)
                # alpha (bs * sl, 1)
                alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                                                tf.reshape(W, [-1, 1])))
                r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, self.max_len,
                                                                             1]))  # supposed to be (batch_size * HIDDEN_SIZE, 1)
                r = tf.squeeze(r)
                h_star = tf.tanh(r)
                drop = tf.nn.dropout(h_star, keep_prob)

                # Fully connected layer(dense layer)
                y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc)

            return y_hat, tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))

        logits, self.cls_loss = cal_loss_logit(batch_embedded, self.keep_prob, reuse=False)
        embedding_perturbated = self._add_perturbation(batch_embedded, self.cls_loss)
        adv_logits, self.adv_loss = cal_loss_logit(embedding_perturbated, self.keep_prob, reuse=True)
        self.loss = self.cls_loss + self.adv_loss

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step,
                                                       name='train_step')
        self.prediction = tf.argmax(tf.nn.softmax(logits), 1)

        print("graph built successfully!") 
Example #15
Source File: attn_bi_lstm.py    From Text-Classification with Apache License 2.0 4 votes vote down vote up
def build_graph(self):
        print("building graph")
        # Word embedding
        embeddings_var = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),
                                     trainable=True)
        batch_embedded = tf.nn.embedding_lookup(embeddings_var, self.x)

        rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size),
                                BasicLSTMCell(self.hidden_size),
                                inputs=batch_embedded, dtype=tf.float32)

        fw_outputs, bw_outputs = rnn_outputs

        W = tf.Variable(tf.random_normal([self.hidden_size], stddev=0.1))
        H = fw_outputs + bw_outputs  # (batch_size, seq_len, HIDDEN_SIZE)
        M = tf.tanh(H)  # M = tanh(H)  (batch_size, seq_len, HIDDEN_SIZE)

        self.alpha = tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(M, [-1, self.hidden_size]),
                                                        tf.reshape(W, [-1, 1])),
                                              (-1, self.max_len)))  # batch_size x seq_len
        r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                      tf.reshape(self.alpha, [-1, self.max_len, 1]))
        r = tf.squeeze(r)
        h_star = tf.tanh(r)  # (batch , HIDDEN_SIZE

        h_drop = tf.nn.dropout(h_star, self.keep_prob)

        # Fully connected layer(dense layer)
        FC_W = tf.Variable(tf.truncated_normal([self.hidden_size, self.n_class], stddev=0.1))
        FC_b = tf.Variable(tf.constant(0., shape=[self.n_class]))
        y_hat = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))

        # prediction
        self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1)

        # optimization
        loss_to_minimize = self.loss
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
        grads, global_norm = tf.clip_by_global_norm(gradients, 1.0)

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.train_op = self.optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step,
                                                       name='train_step')
        print("graph built successfully!")