Python Examples of tensorflow.orthogonal

Source File: generator.py From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0

6 votes

def init_param(self):
		idm = self.input_dim
		hs = self.hidden_size
		ws = len(self.window)
		nf = idm * ws
		# author's special initlaization strategy.
		self.Wemb = tf.get_variable(name=self.name + '_Wemb', shape=[self.vocab_size, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.bhid = tf.get_variable(name=self.name + '_bhid', shape=[self.vocab_size], dtype=tf.float32, initializer=tf.zeros_initializer())
		self.Vhid = tf.get_variable(name=self.name + '_Vhid', shape=[hs, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.Vhid = dot(self.Vhid, self.Wemb) # [hidden_size, vocab_size]
		self.i2h_W = tf.get_variable(name=self.name + '_i2h_W', shape=[idm, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.h2h_W = tf.get_variable(name=self.name + '_h2h_W', shape=[hs, hs * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer())
		self.z2h_W = tf.get_variable(name=self.name + '_z2h_W', shape=[nf, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		b_init_1 = tf.zeros((hs,))
		b_init_2 = tf.ones((hs,)) * 3
		b_init_3 = tf.zeros((hs,))
		b_init_4 = tf.zeros((hs,))
		b_init = tf.concat([b_init_1, b_init_2, b_init_3, b_init_4], axis=0)
		# b_init = tf.constant(b_init)
		# self.b = tf.get_variable(name=self.name + '_b', shape=[hs * 4], dtype=tf.float32, initializer=b_init)
		self.b = tf.get_variable(name=self.name + '_b', dtype=tf.float32, initializer=b_init) # ValueError: If initializer is a constant, do not specify shape.
		self.C0 = tf.get_variable(name=self.name + '_C0', shape=[nf, hs], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.b0 = tf.get_variable(name=self.name + '_b0', shape=[hs], dtype=tf.float32, initializer=tf.zeros_initializer())

Source File: train.py From UNMT-SPR with MIT License

6 votes

def get_initializer(params):
    if params.initializer == "uniform":
        max_val = 0.1 * params.initializer_gain
        return tf.random_uniform_initializer(-max_val, max_val)
    elif params.initializer == "normal":
        return tf.random_normal_initializer(0.0, params.initializer_gain)
    elif params.initializer == "orthogonal":
        return tf.orthogonal_initializer(params.initializer_gain)
    elif params.initializer == "normal_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="normal")
    elif params.initializer == "uniform_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="uniform")
    else:
        raise ValueError("Unrecognized initializer: %s" % params.initializer)

Source File: classifiers.py From Parser-v3 with Apache License 2.0

6 votes

def hidden(layer, hidden_size, hidden_func=nonlin.relu, hidden_keep_prob=1.):
  """"""

  layer_shape = nn.get_sizes(layer)
  input_size = layer_shape.pop()
  weights = tf.get_variable('Weights', shape=[input_size, hidden_size])#, initializer=tf.orthogonal_initializer)
  biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer)
  if hidden_keep_prob < 1.:
    if len(layer_shape) > 1:
      noise_shape = tf.stack(layer_shape[:-1] + [1, input_size])
    else:
      noise_shape = None
    layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape)
  
  layer = nn.reshape(layer, [-1, input_size])
  layer = tf.matmul(layer, weights) + biases
  layer = hidden_func(layer)
  layer = nn.reshape(layer, layer_shape + [hidden_size])
  return layer

#===============================================================

Source File: esim.py From inferbeddings with MIT License

6 votes

def _transform_compare(self, sequence, sequence_length, reuse=False):
        with tf.variable_scope('transform_compare', reuse=reuse) as _:
            sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob)
            projection = tf.contrib.layers.fully_connected(inputs=sequence,
                                                           num_outputs=self.representation_size,
                                                           weights_initializer=tf.random_normal_initializer(0.0, 0.01),
                                                           biases_initializer=tf.zeros_initializer(),
                                                           activation_fn=tf.nn.relu)
            cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw,
                inputs=projection,
                sequence_length=sequence_length,
                dtype=tf.float32)
        return tf.concat(outputs, axis=2)

Source File: optimize.py From fine-lm with MIT License

6 votes

def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  if not tf.contrib.eager.in_eager_mode():
    tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  elif hparams.initializer == "xavier":
    return tf.contrib.layers.xavier_initializer()
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)

Source File: util.py From sonic_contest with MIT License

6 votes

def nature_cnn(obs_batch, dense=tf.layers.dense):
    """
    Apply the CNN architecture from the Nature DQN paper.

    The result is a batch of feature vectors.
    """
    conv_kwargs = {
        'activation': tf.nn.relu,
        'kernel_initializer': tf.orthogonal_initializer(gain=math.sqrt(2))
    }
    with tf.variable_scope('layer_1'):
        cnn_1 = tf.layers.conv2d(obs_batch, 32, 8, 4, **conv_kwargs)
    with tf.variable_scope('layer_2'):
        cnn_2 = tf.layers.conv2d(cnn_1, 64, 4, 2, **conv_kwargs)
    with tf.variable_scope('layer_3'):
        cnn_3 = tf.layers.conv2d(cnn_2, 64, 3, 1, **conv_kwargs)
    flat_size = product([x.value for x in cnn_3.get_shape()[1:]])
    flat_in = tf.reshape(cnn_3, (tf.shape(cnn_3)[0], int(flat_size)))
    return dense(flat_in, 512, **conv_kwargs)

Source File: network.py From ppo with MIT License

6 votes

def make_cnn(convs, padding, inpt, initializer=None):
    if initializer is None:
        initializer = tf.orthogonal_initializer(np.sqrt(2.0))
    out = inpt
    with tf.variable_scope('convnet'):
        for num_outputs, kernel_size, stride in convs:
            out = layers.convolution2d(
                out,
                num_outputs=num_outputs,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                activation_fn=tf.nn.relu,
                weights_initializer=initializer
            )
    return out

Source File: fc1024.py From triplet-reid with MIT License

6 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.identity(endpoints['emb_raw'], name="out_emb")
    
    return endpoints

Source File: fc1024_normalize.py From triplet-reid with MIT License

6 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb")

    return endpoints

Source File: fc1024.py From vehicle-triplet-reid with MIT License

6 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')

    return endpoints

Source File: fc1024_normalize.py From vehicle-triplet-reid with MIT License

6 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1)

    return endpoints

Source File: encoder.py From NAO with GNU General Public License v3.0

6 votes

def __init__(self, x, y, params, mode, scope='Encoder', reuse=tf.AUTO_REUSE):
    self.x = x
    self.y = y
    self.params = params
    self.batch_size = tf.shape(x)[0]
    self.vocab_size = params['encoder_vocab_size']
    self.emb_size = params['encoder_emb_size']
    self.hidden_size = params['encoder_hidden_size']
    self.encoder_length = params['encoder_length']
    self.weight_decay = params['weight_decay']
    self.mode = mode
    self.time_major = params['time_major']
    self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_training:
      self.params['encoder_dropout'] = 0.0
      self.params['mlp_dropout'] = 0.0

    #initializer = tf.orthogonal_initializer()
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    tf.get_variable_scope().set_initializer(initializer)
    self.build_graph(scope=scope, reuse=reuse)

Source File: encoder.py From NAO with GNU General Public License v3.0

6 votes

def __init__(self, x, y, params, mode, scope='Encoder', reuse=False):
    self.x = x
    self.y = y
    self.params = params
    self.batch_size = tf.shape(x)[0]
    self.vocab_size = params['encoder_vocab_size']
    self.emb_size = params['encoder_emb_size']
    self.hidden_size = params['encoder_hidden_size']
    self.encoder_length = params['encoder_length']
    self.weight_decay = params['weight_decay']
    self.mode = mode
    self.time_major = params['time_major']
    self.weighted_loss = params['weighted_loss']
    self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_training:
      self.params['encoder_dropout'] = 0.0
      self.params['mlp_dropout'] = 0.0

    #initializer = tf.orthogonal_initializer()
    self.build_graph(scope, reuse)

Source File: operations.py From DeepPavlov with Apache License 2.0

6 votes

def matmul_2d(x, out_dimension, drop_prob=None):
    '''Multiplies 2-d tensor by weights.

    Args:
        x: a tensor with shape [batch, dimension]
        out_dimension: a number

    Returns:
        a tensor with shape [batch, out_dimension]

    Raises:
    '''
    W = tf.get_variable(
        name='weights',
        shape=[x.shape[1], out_dimension],
        dtype=tf.float32,
        initializer=tf.orthogonal_initializer())
    if drop_prob is not None:
        W = tf.nn.dropout(W, drop_prob)
        log.info('W is dropout')

    return tf.matmul(x, W)

Source File: direct.py From vehicle-triplet-reid with MIT License

5 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')

    return endpoints

Source File: decoder.py From NAO with GNU General Public License v3.0

5 votes

def __init__(self,
               encoder_outputs,
               encoder_state,
               target_input,
               target,
               params,
               mode,
               scope=None):
    """Create the model."""
    self.params = params
    self.encoder_outputs = encoder_outputs
    self.encoder_state = encoder_state
    self.target_input = target_input
    self.target = target
    self.batch_size = tf.shape(self.target_input)[0]
    self.mode = mode
    self.vocab_size = params['decoder_vocab_size']
    self.num_layers = params['decoder_num_layers']
    self.decoder_length = params['decoder_length']
    self.time_major = params['time_major']
    self.hidden_size = params['decoder_hidden_size']
    self.weight_decay = params['weight_decay']
    self.is_traing = mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_traing:
      self.params['decoder_dropout'] = 0.0
    self.branch_length = self.decoder_length // 2 // 5 // 2  # 2 types of cell, 5 nodes, 2 branchs

    # Initializer
    #initializer = tf.orthogonal_initializer()
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    tf.get_variable_scope().set_initializer(initializer)

    ## Build graph
    self.build_graph(scope=scope)

Source File: ops.py From HyperGAN with MIT License

5 votes

def orthogonal_initializer(self, gain):
        def _build(shape):
            return tf.orthogonal_initializer(gain)
        return _build

Source File: hyperparams_builder.py From aster with MIT License

5 votes

def _build_initializer(initializer):
  """Build a tf initializer from config.

  Args:
    initializer: hyperparams_pb2.Hyperparams.regularizer proto.

  Returns:
    tf initializer.

  Raises:
    ValueError: On unknown initializer.
  """
  initializer_oneof = initializer.WhichOneof('initializer_oneof')
  if initializer_oneof == 'truncated_normal_initializer':
    return tf.truncated_normal_initializer(
        mean=initializer.truncated_normal_initializer.mean,
        stddev=initializer.truncated_normal_initializer.stddev)
  if initializer_oneof == 'variance_scaling_initializer':
    enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                       DESCRIPTOR.enum_types_by_name['Mode'])
    mode = enum_descriptor.values_by_number[initializer.
                                            variance_scaling_initializer.
                                            mode].name
    return layers.variance_scaling_initializer(
        factor=initializer.variance_scaling_initializer.factor,
        mode=mode,
        uniform=initializer.variance_scaling_initializer.uniform)
  if initializer_oneof == 'orthogonal_initializer':
    return tf.orthogonal_initializer(
      gain=initializer.orthogonal_initializer.gain,
      seed=initializer.orthogonal_initializer.seed
    )
  if initializer_oneof == 'uniform_initializer':
    return tf.random_uniform_initializer(
      minval=initializer.uniform_initializer.minval,
      maxval=initializer.uniform_initializer.maxval)
  raise ValueError('Unknown initializer function: {}'.format(
      initializer_oneof))

Source File: esim.py From inferbeddings with MIT License

5 votes

def _transform_input(self, sequence, sequence_length, reuse=False):
        with tf.variable_scope('transform_input', reuse=reuse) as _:
            sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob)
            cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw,
                inputs=sequence, sequence_length=sequence_length,
                dtype=tf.float32)
        return tf.concat(outputs, axis=2)

Source File: direct_normalize.py From triplet-reid with MIT License

5 votes

def head(endpoints, embedding_dim, is_training):
    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb")

    return endpoints

Source File: network.py From ppo with MIT License

5 votes

def cnn_network(convs,
                fcs,
                use_lstm,
                padding,
                inpt,
                masks,
                rnn_state,
                num_actions,
                lstm_unit,
                nenvs,
                step_size,
                scope):
    out = make_cnn(convs, padding, inpt)
    out = layers.flatten(out)
    out = make_fcs(fcs, out)
    rnn_out, rnn_state = make_lstm(
        lstm_unit, nenvs, step_size, out, masks, rnn_state)

    if use_lstm:
        out = rnn_out

    policy = layers.fully_connected(
        out, num_actions, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(0.1))
    dist = tf.distributions.Categorical(probs=tf.nn.softmax(policy))

    value = layers.fully_connected(
        out, 1, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(1.0))

    return dist, value, rnn_state

Source File: network.py From ppo with MIT License

5 votes

def make_fcs(fcs, inpt, activation=tf.nn.relu, initializer=None):
    if initializer is None:
        initializer = tf.orthogonal_initializer(np.sqrt(2.0))
    out = inpt
    with tf.variable_scope('hiddens'):
        for hidden in fcs:
            out = layers.fully_connected(out, hidden, activation_fn=activation,
                                         weights_initializer=initializer)
    return out

Source File: SAR.py From ChID-Dataset with Apache License 2.0

5 votes

def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, num_labels, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length,
                                                       dtype=tf.float32, scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units]
        bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32)
        attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units]
                                     tf.transpose(state_doc, [0, 2, 1]))  # [batch, 2 * num_units, length]
        tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1])
        attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True))
        #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length]
        state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units]

        match_matrix = tf.einsum("abcd,abd->abc", can_embedding, state_attention) # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm)

Source File: LM.py From ChID-Dataset with Apache License 2.0

5 votes

def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, num_labels, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32,
                                                       scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2)  # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc)  # [batch, labels, 2 * num_units]
        match_matrix = tf.einsum("abcd,abd->abc", can_embedding, blanks_states)  # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm)

Source File: classifiers.py From Parser-v3 with Apache License 2.0

5 votes

def hiddens(layer, hidden_sizes, hidden_func=nonlin.relu, hidden_keep_prob=1.):
  """"""

  layer_shape = nn.get_sizes(layer)
  input_size = layer_shape.pop()
  weights = []
  for i, hidden_size in enumerate(hidden_sizes):
    weights.append(tf.get_variable('Weights-%d' % i, shape=[input_size, hidden_size]))#, initializer=tf.orthogonal_initializer))
  weights = tf.concat(weights, axis=1)
  hidden_size = sum(hidden_sizes)
  biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer)
  if hidden_keep_prob < 1.:
    if len(layer_shape) > 1:
      noise_shape = tf.stack(layer_shape[:-1] + [1, input_size])
    else:
      noise_shape = None
    layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape)
  
  layer = nn.reshape(layer, [-1, input_size])
  layer = tf.matmul(layer, weights) + biases
  layer = hidden_func(layer)
  layer = nn.reshape(layer, layer_shape + [hidden_size])
  layers = tf.split(layer, hidden_sizes, axis=-1)
  return layers

#===============================================================

Source File: SAR.py From ChID-Dataset with Apache License 2.0

5 votes

def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length,
                                                       dtype=tf.float32, scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units]
        bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32)
        attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units]
                                     tf.transpose(state_doc, [0, 2, 1]))  # [batch, 2 * num_units, length]
        tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1])
        attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True))
        #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length]
        state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units]

        match_matrix = tf.matmul(state_attention, tf.transpose(can_embedding, [0, 2, 1])) # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm)

Source File: LM.py From ChID-Dataset with Apache License 2.0

5 votes

def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32,
                                                       scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2)  # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc)  # [batch, labels, 2 * num_units]
        match_matrix = tf.matmul(blanks_states, tf.transpose(can_embedding, [0, 2, 1]))  # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm)

Source File: ortho_gru_cell.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License

5 votes

def call(self, inputs, state):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope("gates"):
            input_to_gates = tf.layers.dense(
                inputs, 2 * self._num_units, name="input_proj",
                use_bias=self.use_input_bias)

            # Nematus does the orthogonal initialization probably differently
            state_to_gates = tf.layers.dense(
                state, 2 * self._num_units,
                use_bias=self.use_state_bias,
                kernel_initializer=orthogonal_initializer(),
                name="state_proj")

            gates_input = state_to_gates + input_to_gates
            reset, update = tf.split(
                tf.sigmoid(gates_input), num_or_size_splits=2, axis=1)

        with tf.variable_scope("candidate"):
            input_to_candidate = tf.layers.dense(
                inputs, self._num_units, use_bias=self.use_input_bias,
                name="input_proj")

            state_to_candidate = tf.layers.dense(
                state, self._num_units, use_bias=self.use_state_bias,
                kernel_initializer=orthogonal_initializer(),
                name="state_proj")

            candidate = self._activation(
                state_to_candidate * reset + input_to_candidate)

        new_state = update * state + (1 - update) * candidate
        return new_state, new_state

Source File: decoder.py From NAO with GNU General Public License v3.0

5 votes

def __init__(self,
               encoder_outputs,
               encoder_state,
               target_input,
               target,
               params,
               mode,
               scope=None,
               reuse=False):
    """Create the model."""
    self.params = params
    self.encoder_outputs = encoder_outputs
    self.encoder_state = encoder_state
    self.target_input = target_input
    self.target = target
    self.batch_size = tf.shape(self.target_input)[0]
    self.mode = mode
    self.vocab_size = params['decoder_vocab_size']
    self.num_layers = params['decoder_num_layers']
    self.time_major = params['time_major']
    self.hidden_size = params['decoder_hidden_size']
    self.weight_decay = params['weight_decay']
    self.wn = params['wn']
    self.is_traing = mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_traing:
      self.params['decoder_dropout'] = 0.0

    # Initializer
    #initializer = tf.orthogonal_initializer()
    ## Build graph
    self.build_graph(scope, reuse)

Source File: ortho_gru_cell.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, num_units, activation=None, reuse=None):
        tf.contrib.rnn.GRUCell.__init__(
            self, num_units, activation, reuse,
            kernel_initializer=tf.orthogonal_initializer())

Python tensorflow.orthogonal_initializer() Examples