Python Examples of tensorflow.contrib.layers.xavier

Source File: utilities.py From versa with MIT License

6 votes

def dense_layer(inputs, output_size, activation, use_bias, name):
    """
    A simple dense layer.
    :param inputs: batch of inputs.
    :param output_size: dimensionality of the output.
    :param activation: activation function to use.
    :param use_bias: whether to have bias weights or not.
    :param name: name used to scope this operation.
    :return: batch of outputs.
     """
    return tf.layers.dense(
        inputs=inputs,
        units=output_size,
        kernel_initializer=xavier_initializer(uniform=False),
        use_bias=use_bias,
        bias_initializer=tf.random_normal_initializer(stddev=1e-3),
        activation=activation,
        name=name,
        reuse=tf.AUTO_REUSE)

Source File: content_averaging_model.py From ConMask with MIT License

6 votes

def _create_embeddings(self, device='/cpu:0'):
        """ Create all embedding matrices in this function.

        :param device: The storage device of all the embeddings. If
                        you are using multi-gpus, it is ideal to store
                        the embeddings on CPU to avoid costly GPU-to-GPU
                        memory copying. The embeddings should be stored under
                        variable scope self.embedding_scope
        :return:
        """
        with tf.device(device):
            with tf.variable_scope(self.embedding_scope):
                self.word_embedding = tf.get_variable('word_embedding',
                                                      [self.n_vocab + self.word_oov, self.word_embedding_size],
                                                      dtype=tf.float32,
                                                      initializer=layers.xavier_initializer())

Source File: content_model.py From ConMask with MIT License

6 votes

def _create_embeddings(self, device='/cpu:0'):
        """ Create all embedding matrices in this function.

        :param device: The storage device of all the embeddings. If
                        you are using multi-gpus, it is ideal to store
                        the embeddings on CPU to avoid costly GPU-to-GPU
                        memory copying. The embeddings should be stored under
                        variable scope self.embedding_scope
        :return:
        """
        with tf.device(device):
            with tf.variable_scope(self.embedding_scope):
                self.word_embedding = tf.get_variable('word_embedding',
                                                      [self.n_vocab + self.word_oov, self.word_embedding_size],
                                                      dtype=tf.float32,
                                                      initializer=layers.xavier_initializer())

Source File: fcn_model_v2.py From ConMask with MIT License

6 votes

def _create_embeddings(self, device='/cpu:0'):
        """ Create all embedding matrices in this function.

               :param device: The storage device of all the embeddings. If
                               you are using multi-gpus, it is ideal to store
                               the embeddings on CPU to avoid costly GPU-to-GPU
                               memory copying. The embeddings should be stored under
                               variable scope self.embedding_scope
               :return:
               """
        with tf.device(device):
            with tf.variable_scope(self.embedding_scope):
                self.word_embedding = tf.get_variable('word_embedding',
                                                      [self.n_vocab + self.word_oov, self.word_embedding_size],
                                                      dtype=tf.float32,
                                                      initializer=layers.xavier_initializer(),
                                                      trainable=not self.fix_embedding)

Source File: es.py From rl_algorithms with MIT License

6 votes

def _make_network(self, data_in, out_dim):
        """ Build the network with the same architecture following OpenAI's paper.

        Returns the final *layer* of the network, which corresponds to our
        chosen action.  There is no non-linearity for the last layer because
        different envs have different action ranges.
        """
        with tf.variable_scope("ESAgent", reuse=False):
            out = data_in
            out = layers.fully_connected(out, num_outputs=64,
                    weights_initializer = layers.xavier_initializer(uniform=True),
                    #weights_initializer = utils.normc_initializer(0.5),
                    activation_fn = tf.nn.tanh)
            out = layers.fully_connected(out, num_outputs=64,
                    weights_initializer = layers.xavier_initializer(uniform=True),
                    #weights_initializer = utils.normc_initializer(0.5),
                    activation_fn = tf.nn.tanh)
            out = layers.fully_connected(out, num_outputs=out_dim,
                    weights_initializer = layers.xavier_initializer(uniform=True),
                    #weights_initializer = utils.normc_initializer(0.5),
                    activation_fn = None)
            return out

Source File: ddpg.py From rl_algorithms with MIT License

6 votes

def _build_net(self, input_BO, scope):
        """ The Actor network.
        
        Uses ReLUs for all hidden layers, but a tanh to the output to bound the
        action. This follows their 'low-dimensional networks' using 400 and 300
        units for the hidden layers. Set `reuse=False`. I don't use batch
        normalization or their precise weight initialization.
        """
        with tf.variable_scope(scope, reuse=False):
            hidden1 = layers.fully_connected(input_BO,
                    num_outputs=400,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            hidden2 = layers.fully_connected(hidden1, 
                    num_outputs=300,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            actions_BA = layers.fully_connected(hidden2,
                    num_outputs=self.ac_dim,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.tanh) # Note the tanh!
            # This should broadcast, but haven't tested with ac_dim > 1.
            actions_BA = tf.multiply(actions_BA, self.ac_high)
            return actions_BA

Source File: keras_layers.py From videograph with GNU General Public License v3.0

6 votes

def build(self, input_shape):
        """
        Input shape is (None, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        n_channels_in = input_shape[3]
        n_channels_out = self.n_channels_out
        feat_map_side_dim = input_shape[2]

        initializer = contrib_layers.xavier_initializer()
        self.feat_map_side_dim = feat_map_side_dim

        weight_shape = [n_channels_in, n_channels_out]
        bias_shape = [n_channels_out]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable('conv_weights', shape=weight_shape, initializer=initializer)
            self.conv_biases = tf.get_variable('conv_biases', shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(ConvOverSpaceLayer, self).build(input_shape)

Source File: keras_layers.py From videograph with GNU General Public License v3.0

6 votes

def build(self, input_shape):
        """
        Input shape is (None, 10, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        assert len(input_shape) == 5

        _, self.n_timesteps_in, self.side_dim1, self.side_dim2, self.n_channels = input_shape

        initializer = contrib_layers.xavier_initializer()

        weight_shape = [self.n_channels, self.n_timesteps_in, self.n_timesteps_out]
        bias_shape = [self.n_channels, 1, self.n_timesteps_out]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable('dense_weights', shape=weight_shape, initializer=initializer)
            self.conv_biases = tf.get_variable('dense_biases', shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(DepthwiseDenseLayer, self).build(input_shape)

Source File: gitloss.py From Git-Loss-For-Deep-Face-Recognition with MIT License

6 votes

def inference(input_images):
    with slim.arg_scope([slim.conv2d], kernel_size=3, padding='SAME'):
        with slim.arg_scope([slim.max_pool2d], kernel_size=2):
            x = slim.conv2d(input_images, num_outputs=32, weights_initializer=initializers.xavier_initializer(),
                            scope='conv1_1')
            x = slim.conv2d(x, num_outputs=32, weights_initializer=initializers.xavier_initializer(), scope='conv1_2')
            x = slim.max_pool2d(x, scope='pool1')
            x = slim.conv2d(x, num_outputs=64, weights_initializer=initializers.xavier_initializer(), scope='conv2_1')
            x = slim.conv2d(x, num_outputs=64, weights_initializer=initializers.xavier_initializer(), scope='conv2_2')
            x = slim.max_pool2d(x, scope='pool2')
            x = slim.conv2d(x, num_outputs=128, weights_initializer=initializers.xavier_initializer(), scope='conv3_1')
            x = slim.conv2d(x, num_outputs=128, weights_initializer=initializers.xavier_initializer(), scope='conv3_2')
            x = slim.max_pool2d(x, scope='pool3')
            x = slim.flatten(x, scope='flatten')
            feature = slim.fully_connected(x, num_outputs=2, activation_fn=None, scope='fc1')
            x = tflearn.prelu(feature)
            x = slim.fully_connected(x, num_outputs=10, activation_fn=None, scope='fc2')
    return x, feature

Source File: layers_keras.py From timeception with GNU General Public License v3.0

6 votes

def build(self, input_shape):
        """
        Input shape is (None, 10, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        assert len(input_shape) == 5

        _, self.n_timesteps_in, self.side_dim1, self.side_dim2, self.n_channels = input_shape

        initializer = contrib_layers.xavier_initializer()

        weight_shape = [self.n_channels, self.n_timesteps_in, self.n_timesteps_out]
        bias_shape = [self.n_channels, 1, self.n_timesteps_out]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable('dense_weights', shape=weight_shape, initializer=initializer)
            self.conv_biases = tf.get_variable('dense_biases', shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(DepthwiseDenseLayer, self).build(input_shape)

Source File: network.py From HNRE with MIT License

6 votes

def EncoderPCNN(self, is_training, init_vec=None):
        
        with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE):
            input_dim = self.input_embedding.shape[2]
            mask_embedding = tf.constant([[0,0,0],[1,0,0],[0,1,0],[0,0,1]], dtype=np.float32)
            pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
            input_sentence = tf.expand_dims(self.input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(init_vec=init_vec,key='convkernel',name='kernel',
                    shape=[1,3,input_dim,FLAGS.hidden_size],trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,key='convbias',name='bias',shape=[FLAGS.hidden_size],trainable=True)
            x = tf.layers.conv2d(inputs = input_sentence, filters=FLAGS.hidden_size, 
                kernel_size=[1,3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE)
            x = tf.reshape(x, [-1, FLAGS.max_length, FLAGS.hidden_size, 1])
            x = tf.reduce_max(tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_length, 3]) * tf.transpose(x,[0, 2, 1, 3]), axis = 2)
            x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3]))

        return x

Source File: network.py From HNRE with MIT License

6 votes

def EncoderLSTM(self, is_training, init_vec=None):

        with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE):
            input_sentence = tf.layers.dropout(self.input_embedding, rate = self.keep_prob, training = is_training)
            fw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size, state_is_tuple=True)
            bw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size, state_is_tuple=True)
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                            fw_cell, bw_cell, input_sentence,
                            sequence_length = self.len,
                            dtype = tf.float32,
                            scope = 'bi-dynamic-rnn')
            fw_states, bw_states = states
            if isinstance(fw_states, tuple):
                fw_states = fw_states[0]
                bw_states = bw_states[0]
            x = tf.concat(states, axis=1)
            
        return x

Source File: hatn.py From HATN with MIT License

6 votes

def __init__(self,
                 config,
                 args,
                 word_vecs,
                 init = tf.random_uniform_initializer(minval=-0.01, maxval=0.01), # init = layers.xavier_initializer(),
                 name='HATN'):

        self.cfg       = config
        self.args      = args
        self.word_vecs =  word_vecs
        self.init      = init
        self.name      = name

        self.memory_size    = self.cfg.memory_size
        self.sent_size      = self.cfg.sent_size
        self.embed_size     = self.cfg.embed_size
        self.hidden_size    = self.cfg.hidden_size
        self.l2_reg_lambda  = self.cfg.l2_reg_lambda
        self.max_grad_norm  = self.cfg.max_grad_norm
        self.hops           = self.cfg.hops

        self.build_vars()
        self.build_eval_op()

Source File: layers_keras.py From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License

6 votes

def build(self, input_shape):
        """
        Input shape is (None, 10, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        assert len(input_shape) == 5

        _, self.n_timesteps_in, self.side_dim1, self.side_dim2, self.n_channels = input_shape

        initializer = contrib_layers.xavier_initializer()

        weight_shape = [self.n_channels, self.n_timesteps_in, self.n_timesteps_out]
        bias_shape = [self.n_channels, 1, self.n_timesteps_out]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable('dense_weights', shape=weight_shape, initializer=initializer)
            self.conv_biases = tf.get_variable('dense_biases', shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(DepthwiseDenseLayer, self).build(input_shape)

Source File: pnet.py From HATN with MIT License

6 votes

def __init__(self,
                 config,
                 args,
                 word_vecs,
                 init = tf.random_uniform_initializer(minval=-0.01, maxval=0.01), # init = layers.xavier_initializer(),
                 name='PNet'):

        self.cfg       = config
        self.args      = args
        self.word_vecs =  word_vecs
        self.init      = init
        self.name      = name

        self.memory_size    = self.cfg.memory_size
        self.sent_size      = self.cfg.sent_size
        self.embed_size     = self.cfg.embed_size
        self.hidden_size    = self.cfg.hidden_size
        self.l2_reg_lambda  = self.cfg.l2_reg_lambda
        self.max_grad_norm  = self.cfg.max_grad_norm
        self.hops           = self.cfg.hops

        self.build_vars()
        self.build_eval_op()

Source File: nn.py From image_captioning with MIT License

5 votes

def prepare(self):
        """ Setup the weight initalizers and regularizers. """
        config = self.config

        self.conv_kernel_initializer = layers.xavier_initializer()

        if self.train_cnn and config.conv_kernel_regularizer_scale > 0:
            self.conv_kernel_regularizer = layers.l2_regularizer(
                scale = config.conv_kernel_regularizer_scale)
        else:
            self.conv_kernel_regularizer = None

        if self.train_cnn and config.conv_activity_regularizer_scale > 0:
            self.conv_activity_regularizer = layers.l1_regularizer(
                scale = config.conv_activity_regularizer_scale)
        else:
            self.conv_activity_regularizer = None

        self.fc_kernel_initializer = tf.random_uniform_initializer(
            minval = -config.fc_kernel_initializer_scale,
            maxval = config.fc_kernel_initializer_scale)

        if self.is_train and config.fc_kernel_regularizer_scale > 0:
            self.fc_kernel_regularizer = layers.l2_regularizer(
                scale = config.fc_kernel_regularizer_scale)
        else:
            self.fc_kernel_regularizer = None

        if self.is_train and config.fc_activity_regularizer_scale > 0:
            self.fc_activity_regularizer = layers.l1_regularizer(
                scale = config.fc_activity_regularizer_scale)
        else:
            self.fc_activity_regularizer = None

Source File: ops.py From DeepCreamPy with GNU Affero General Public License v3.0

5 votes

def dense_SN(tensor, output_dim, name):
    _, h, w, c = [i.value for i in tensor.get_shape()]

    w = tf.get_variable(name=name + 'w', shape=[h, w, c, output_dim], initializer=layers.xavier_initializer())
    b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0))

    output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, 1, 1, 1], padding='VALID') + b

    return output

Source File: network.py From HNRE with MIT License

5 votes

def EncoderCNN(self, is_training, init_vec=None):

        with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE):
            input_dim = self.input_embedding.shape[2]
            input_sentence = tf.expand_dims(self.input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(init_vec=init_vec,key='convkernel',name='kernel',
                    shape=[1,3,input_dim,FLAGS.hidden_size],trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,key='convbias',name='bias',shape=[FLAGS.hidden_size],trainable=True)
            x = tf.layers.conv2d(inputs = input_sentence, filters=FLAGS.hidden_size, 
                kernel_size=[1,3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE)
            x = tf.reduce_max(x, axis=2)
            x = tf.nn.relu(tf.squeeze(x, 1))

        return x

Source File: HAN_model.py From hierarchical-attention-networks with MIT License

5 votes

def _init_embedding(self, scope):
    with tf.variable_scope(scope):
      with tf.variable_scope("embedding") as scope:
        self.embedding_matrix = tf.get_variable(
          name="embedding_matrix",
          shape=[self.vocab_size, self.embedding_size],
          initializer=layers.xavier_initializer(),
          dtype=tf.float32)
        self.inputs_embedded = tf.nn.embedding_lookup(
          self.embedding_matrix, self.inputs)

Source File: ops.py From DeepCreamPy with GNU Affero General Public License v3.0

5 votes

def convolution_SN(tensor, output_dim, kernel_size, stride, name):
    _, h, w, c = [i.value for i in tensor.get_shape()]

    w = tf.get_variable(name=name + 'w', shape=[kernel_size, kernel_size, c, output_dim], initializer=layers.xavier_initializer())
    b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0))

    output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, stride, stride, 1], padding='SAME') + b

    return output

Source File: model_components.py From hierarchical-attention-networks with MIT License

5 votes

def task_specific_attention(inputs, output_size,
                            initializer=layers.xavier_initializer(),
                            activation_fn=tf.tanh, scope=None):
    """
    Performs task-specific attention reduction, using learned
    attention context vector (constant within task of interest).

    Args:
        inputs: Tensor of shape [batch_size, units, input_size]
            `input_size` must be static (known)
            `units` axis will be attended over (reduced from output)
            `batch_size` will be preserved
        output_size: Size of output's inner (feature) dimension

    Returns:
        outputs: Tensor of shape [batch_size, output_dim].
    """
    assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None

    with tf.variable_scope(scope or 'attention') as scope:
        attention_context_vector = tf.get_variable(name='attention_context_vector',
                                                   shape=[output_size],
                                                   initializer=initializer,
                                                   dtype=tf.float32)
        input_projection = layers.fully_connected(inputs, output_size,
                                                  activation_fn=activation_fn,
                                                  scope=scope)

        vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
        attention_weights = tf.nn.softmax(vector_attn, dim=1)
        weighted_projection = tf.multiply(input_projection, attention_weights)

        outputs = tf.reduce_sum(weighted_projection, axis=1)

        return outputs

Source File: layers.py From ner with Apache License 2.0

5 votes

def dense_convolutional_network(input_units,
                                n_filters=None,
                                n_layers=1,
                                filter_width=3,
                                use_dilation=False,
                                use_batch_norm=False,
                                training_ph=None):
    units = input_units
    if n_filters is None:
        # If number of filters is not given the number of filters
        # will be equal to the number of input features
        n_filters = input_units.get_shape().as_list()[-1]
    units_list = [units]
    for n_layer in range(n_layers):
        total_units = tf.concat(units_list, axis=-1)
        if use_dilation:
            dilation_rate = 2**n_layer
        else:
            dilation_rate = 1
        units = tf.layers.conv1d(total_units,
                                 n_filters,
                                 filter_width,
                                 dilation_rate=dilation_rate,
                                 padding='same',
                                 kernel_initializer=xavier_initializer())
        if use_batch_norm:
            units = tf.layers.batch_normalization(units, training=training_ph)
        units = tf.nn.relu(units)
        units_list.append(units)
    return units

Source File: network.py From DeepPavlov with Apache License 2.0

5 votes

def biaffine_attention(deps: tf.Tensor, heads: tf.Tensor, name="biaffine_attention") -> tf.Tensor:
    """Implements a trainable matching layer between two families of embeddings.

    Args:
        deps: the 3D-tensor of dependency states,
        heads: the 3D-tensor of head states,
        name: the name of a layer

    Returns:
        `answer` a 3D-tensor of pairwise scores between deps and heads

    """
    deps_dim_int = deps.get_shape().as_list()[-1]
    heads_dim_int = heads.get_shape().as_list()[-1]
    assert deps_dim_int == heads_dim_int
    with tf.variable_scope(name):
        kernel_shape = (deps_dim_int, heads_dim_int)
        kernel = tf.get_variable('kernel', shape=kernel_shape, initializer=tf.initializers.identity())
        first_bias = tf.get_variable('first_bias', shape=(kernel_shape[0], 1),
                                     initializer=xavier_initializer())
        second_bias = tf.get_variable('second_bias', shape=(kernel_shape[1], 1),
                                      initializer=xavier_initializer())
        # deps.shape = (B, L, D)
        # first.shape = (B, L, D), first_rie = sum_d deps_{rid} kernel_{de}
        first = tf.tensordot(deps, kernel, axes=[-1, -2])
        answer = tf.matmul(first, heads, transpose_b=True)  # answer.shape = (B, L, L)
        # add bias over x axis
        first_bias_term = tf.tensordot(deps, first_bias, axes=[-1, -2])
        answer += first_bias_term
        # add bias over y axis
        second_bias_term = tf.tensordot(heads, second_bias, axes=[-1, -2])  # (B, L, 1)
        second_bias_term = tf.transpose(second_bias_term, [0, 2, 1])  # (B, 1, L)
        answer += second_bias_term
    return answer

Source File: network.py From DeepPavlov with Apache License 2.0

5 votes

def biaffine_layer(deps: tf.Tensor, heads: tf.Tensor, deps_dim: int,
                   heads_dim: int, output_dim: int, name: str = "biaffine_layer") -> tf.Tensor:
    """Implements a biaffine layer from [Dozat, Manning, 2016].

    Args:
        deps: the 3D-tensor of dependency states,
        heads: the 3D-tensor of head states,
        deps_dim: the dimension of dependency states,
        heads_dim: the dimension of head_states,
        output_dim: the output dimension
        name: the name of a layer

    Returns:
        `answer` the output 3D-tensor

    """
    input_shape = [kb.shape(deps)[i] for i in range(tf.keras.backend.ndim(deps))]
    first_input = tf.reshape(deps, [-1, deps_dim])  # first_input.shape = (B*L, D1)
    second_input = tf.reshape(heads, [-1, heads_dim])  # second_input.shape = (B*L, D2)
    with tf.variable_scope(name):
        kernel_shape = (deps_dim, heads_dim * output_dim)
        kernel = tf.get_variable('kernel', shape=kernel_shape, initializer=xavier_initializer())
        first = tf.matmul(first_input, kernel)  # (B*L, D2*H)
        first = tf.reshape(first, [-1, heads_dim, output_dim])  # (B*L, D2, H)
        answer = kb.batch_dot(first, second_input, axes=[1, 1])  # (B*L, H)
        first_bias = tf.get_variable('first_bias', shape=(deps_dim, output_dim),
                                     initializer=xavier_initializer())
        answer += tf.matmul(first_input, first_bias)
        second_bias = tf.get_variable('second_bias', shape=(heads_dim, output_dim),
                                      initializer=xavier_initializer())
        answer += tf.matmul(second_input, second_bias)
        label_bias = tf.get_variable('label_bias', shape=(output_dim,),
                                     initializer=xavier_initializer())
        answer = kb.bias_add(answer, label_bias)
        answer = tf.reshape(answer, input_shape[:-1] + [output_dim])  # (B, L, H)
    return answer

Source File: policy_network.py From DeepPavlov with Apache License 2.0

5 votes

def _build_body(self) -> Tuple[tf.Tensor, tf.Tensor]:
        # input projection
        _units = tf.layers.dense(self._features, self.dense_size,
                                 kernel_regularizer=tf.nn.l2_loss, kernel_initializer=xav())

        if self.attention_params:
            _attn_output = self._build_attn_body()
            _units = tf.concat([_units, _attn_output], -1)

        _units = tf_layers.variational_dropout(_units, keep_prob=self._dropout_keep_prob)

        # recurrent network unit
        _lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
        _utter_lengths = tf.cast(tf.reduce_sum(self._utterance_mask, axis=-1), tf.int32)

        # _output: [batch_size, max_time, hidden_size]
        # _state: tuple of two [batch_size, hidden_size]
        _output, _state = tf.nn.dynamic_rnn(_lstm_cell, _units,
                                            time_major=False, initial_state=self._initial_state,
                                            sequence_length=_utter_lengths)

        _output = tf.reshape(_output, (self._batch_size, -1, self.hidden_size))
        _output = tf_layers.variational_dropout(_output, keep_prob=self._dropout_keep_prob)
        # output projection
        _logits = tf.layers.dense(_output, self.action_size,
                                  kernel_regularizer=tf.nn.l2_loss, kernel_initializer=xav(), name='logits')
        return _logits, _state

Source File: modules.py From squad-transformer with Apache License 2.0

5 votes

def std_conv(inputs, num_filters, kernel_size=1, padding="SAME", activation_fn=None, l2_lambda=3e-7, use_bias=True,
             scope="Conv", reuse=None):
    """Standard 1D convolution, using SAME padding.

    Inputs:
      inputs: tensor. Input to the 1D conv layer. Shape (batch_size, seq_len, vec_size).
      num_filters: int. Depth of filter stack to use in 1D conv.
      kernel_size: int. Spatial extent of 1D kernel (i.e., number of timesteps the kernel covers per application).
      padding: string. Padding to use for 1D convolution. Defaults to "SAME".
      activation_fn: function. Activation function to apply to outputs before returning. If None, no activation.
      l2_lambda: float. L2 regularization factor to apply to the kernel weights.
      use_bias: bool. If true, apply a bias to the convolution outputs. Else, no bias.
    Returns:
      outputs: tensor. Outputs after convolution, bias (if any), and activation (if any) are applied.
      Shape (batch_size, out_seq_len, num_filters), where out_seq_len depends on the padding.
    """
    with tf.variable_scope(scope, reuse=reuse):
        vec_size = inputs.get_shape()[-1]
        # Use Xavier initializer if no activation, otherwise use He.
        initializer = tf_layers.xavier_initializer if activation_fn is None else tf_layers.variance_scaling_initializer
        filters = tf.get_variable("filters",
                                  shape=(kernel_size, vec_size, num_filters),
                                  dtype=tf.float32,
                                  regularizer=tf_layers.l2_regularizer(scale=l2_lambda),
                                  initializer=initializer())
        outputs = tf.nn.conv1d(inputs, filters, stride=1, padding=padding)
        if use_bias:
            b = tf.get_variable("b", shape=(num_filters,), dtype=tf.float32, initializer=tf.zeros_initializer())
            outputs += b

    return outputs if activation_fn is None else activation_fn(outputs)

Source File: han.py From HierarchicalAttentionNetworksForDocumentClassification with MIT License

5 votes

def attention(self, inputs, output_size):
    """
    desc: create attention mechanism
    args:
      inputs: input which is sentence or document level output from bidirectional rnn layer
      output_size: specify the dimensions of the output
    returns:
      output from attention distribution
    """

    with tf.variable_scope("attention"):
      attention_context_vector_uw = tf.get_variable(name="attention_context_vector",
                                                    shape=[output_size],
                                                    #trainable=self.is_training,
                                                    initializer=layers.xavier_initializer(),
                                                    dtype=tf.float32)
      input_projection_u = layers.fully_connected(inputs,
                                                  output_size,
                                                  #trainable=self.is_training,
                                                  activation_fn=tf.tanh)
      vector_attn = tf.reduce_sum(tf.multiply(input_projection_u, attention_context_vector_uw), axis=2, keep_dims=True)
      attention_weights = tf.nn.softmax(vector_attn, dim=1)
      weighted_projection = tf.multiply(input_projection_u, attention_weights)
      outputs = tf.reduce_sum(weighted_projection, axis=1)
      return outputs
  # end
# end

Source File: keras_layers.py From videograph with GNU General Public License v3.0

5 votes

def build(self, input_shape):
        """
        Input shape is (None, 20, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        assert len(input_shape) == 5

        initializer = contrib_layers.xavier_initializer()

        _, n_timesteps, feat_map_side_dim1, feat_map_side_dim2, n_spatial_maps = input_shape
        self.n_timesteps = n_timesteps
        self.n_maps = n_spatial_maps
        self.side_dim1 = feat_map_side_dim1
        self.side_dim2 = feat_map_side_dim2

        weights_name = 'depthwise_conv_1d_weights'
        biases_name = 'depthwise_conv1d_biases'

        # 1x1 convolution kernel
        weights_shape = [self.kernel_size, 1, n_spatial_maps, 1]
        bias_shape = [n_spatial_maps, ]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable(weights_name, shape=weights_shape, initializer=initializer)
            self.conv_biases = tf.get_variable(biases_name, shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(DepthwiseDilatedConv1DLayer, self).build(input_shape)

Source File: keras_layers.py From videograph with GNU General Public License v3.0

5 votes

def build(self, input_shape):
        """
        Input shape is (None, 20, 7, 7, 1024)
        :param input_shape:
        :return:
        """

        assert len(input_shape) == 5

        initializer = contrib_layers.xavier_initializer()

        _, n_timesteps, feat_map_side_dim1, feat_map_side_dim2, n_spatial_maps = input_shape
        self.n_timesteps = n_timesteps
        self.n_maps = n_spatial_maps
        self.side_dim1 = feat_map_side_dim1
        self.side_dim2 = feat_map_side_dim2

        weights_name = 'depthwise_conv_1d_weights'
        biases_name = 'depthwise_conv1d_biases'

        # 1x1 convolution kernel
        weights_shape = [self.kernel_size, 1, n_spatial_maps, 1]
        bias_shape = [n_spatial_maps, ]

        with tf.variable_scope(self.name) as scope:
            self.conv_weights = tf.get_variable(weights_name, shape=weights_shape, initializer=initializer)
            self.conv_biases = tf.get_variable(biases_name, shape=bias_shape, initializer=tf.constant_initializer(0.1))

        self.trainable_weights = [self.conv_weights, self.conv_biases]

        super(DepthwiseConv1DLayer, self).build(input_shape)

Source File: Random_clip_valid.py From C3D-tensorflow with MIT License

5 votes

def __init__(self,
            num_class = 101,
            keep_prob = 0.6,
            batch_size = 3,
            epoch=40,
            lr = 1e-4):
        self.IMG_WIDTH = 171
        self.IMG_HEIGHT = 128

        self.CROP_WIDTH = 112
        self.CROP_HEIGHT = 112
        self.graph = tf.Graph()
        self.num_class = num_class
        self.epoch = epoch
        self.CLIP_LENGTH = 16
        self.keep_prob = keep_prob
        self.batch_size = batch_size
        decay_epoch=10   #每5个epoch改变一次学习率
        # train clip: 9537*5 CLIP=5
        # test  clip: 3783*5 CLIP=5
        # train clip: 9537*3 CLIP=3
        # test  clip: 3783*3 CLIP=3
        self.n_step_epoch=int( 9537/batch_size)
        with self.graph.as_default():
            self.inputs = tf.placeholder(tf.float32, [None, self.CLIP_LENGTH, self.CROP_HEIGHT, self.CROP_WIDTH, 3])
            self.labels = tf.placeholder(tf.int64, [batch_size,])

            self.initializer = layers.xavier_initializer()
            self.global_step = tf.Variable(0, trainable = False, name = "global_step")
            self.lr = tf.train.exponential_decay(lr, self.global_step, int(decay_epoch*self.n_step_epoch), 1e-1, True)
            tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, self.global_step)

Python tensorflow.contrib.layers.xavier_initializer() Examples