Python tensorflow.orthogonal_initializer() Examples

The following are 30 code examples of tensorflow.orthogonal_initializer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: generator.py    From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 6 votes vote down vote up
def init_param(self):
		idm = self.input_dim
		hs = self.hidden_size
		ws = len(self.window)
		nf = idm * ws
		# author's special initlaization strategy.
		self.Wemb = tf.get_variable(name=self.name + '_Wemb', shape=[self.vocab_size, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.bhid = tf.get_variable(name=self.name + '_bhid', shape=[self.vocab_size], dtype=tf.float32, initializer=tf.zeros_initializer())
		self.Vhid = tf.get_variable(name=self.name + '_Vhid', shape=[hs, idm], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.Vhid = dot(self.Vhid, self.Wemb) # [hidden_size, vocab_size]
		self.i2h_W = tf.get_variable(name=self.name + '_i2h_W', shape=[idm, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.h2h_W = tf.get_variable(name=self.name + '_h2h_W', shape=[hs, hs * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer())
		self.z2h_W = tf.get_variable(name=self.name + '_z2h_W', shape=[nf, hs * 4], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		b_init_1 = tf.zeros((hs,))
		b_init_2 = tf.ones((hs,)) * 3
		b_init_3 = tf.zeros((hs,))
		b_init_4 = tf.zeros((hs,))
		b_init = tf.concat([b_init_1, b_init_2, b_init_3, b_init_4], axis=0)
		# b_init = tf.constant(b_init)
		# self.b = tf.get_variable(name=self.name + '_b', shape=[hs * 4], dtype=tf.float32, initializer=b_init)
		self.b = tf.get_variable(name=self.name + '_b', dtype=tf.float32, initializer=b_init) # ValueError: If initializer is a constant, do not specify shape.
		self.C0 = tf.get_variable(name=self.name + '_C0', shape=[nf, hs], dtype=tf.float32, initializer=tf.random_uniform_initializer())
		self.b0 = tf.get_variable(name=self.name + '_b0', shape=[hs], dtype=tf.float32, initializer=tf.zeros_initializer()) 
Example #2
Source File: train.py    From UNMT-SPR with MIT License 6 votes vote down vote up
def get_initializer(params):
    if params.initializer == "uniform":
        max_val = 0.1 * params.initializer_gain
        return tf.random_uniform_initializer(-max_val, max_val)
    elif params.initializer == "normal":
        return tf.random_normal_initializer(0.0, params.initializer_gain)
    elif params.initializer == "orthogonal":
        return tf.orthogonal_initializer(params.initializer_gain)
    elif params.initializer == "normal_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="normal")
    elif params.initializer == "uniform_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="uniform")
    else:
        raise ValueError("Unrecognized initializer: %s" % params.initializer) 
Example #3
Source File: classifiers.py    From Parser-v3 with Apache License 2.0 6 votes vote down vote up
def hidden(layer, hidden_size, hidden_func=nonlin.relu, hidden_keep_prob=1.):
  """"""

  layer_shape = nn.get_sizes(layer)
  input_size = layer_shape.pop()
  weights = tf.get_variable('Weights', shape=[input_size, hidden_size])#, initializer=tf.orthogonal_initializer)
  biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer)
  if hidden_keep_prob < 1.:
    if len(layer_shape) > 1:
      noise_shape = tf.stack(layer_shape[:-1] + [1, input_size])
    else:
      noise_shape = None
    layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape)
  
  layer = nn.reshape(layer, [-1, input_size])
  layer = tf.matmul(layer, weights) + biases
  layer = hidden_func(layer)
  layer = nn.reshape(layer, layer_shape + [hidden_size])
  return layer

#=============================================================== 
Example #4
Source File: esim.py    From inferbeddings with MIT License 6 votes vote down vote up
def _transform_compare(self, sequence, sequence_length, reuse=False):
        with tf.variable_scope('transform_compare', reuse=reuse) as _:
            sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob)
            projection = tf.contrib.layers.fully_connected(inputs=sequence,
                                                           num_outputs=self.representation_size,
                                                           weights_initializer=tf.random_normal_initializer(0.0, 0.01),
                                                           biases_initializer=tf.zeros_initializer(),
                                                           activation_fn=tf.nn.relu)
            cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw,
                inputs=projection,
                sequence_length=sequence_length,
                dtype=tf.float32)
        return tf.concat(outputs, axis=2) 
Example #5
Source File: optimize.py    From fine-lm with MIT License 6 votes vote down vote up
def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  if not tf.contrib.eager.in_eager_mode():
    tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  elif hparams.initializer == "xavier":
    return tf.contrib.layers.xavier_initializer()
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer) 
Example #6
Source File: util.py    From sonic_contest with MIT License 6 votes vote down vote up
def nature_cnn(obs_batch, dense=tf.layers.dense):
    """
    Apply the CNN architecture from the Nature DQN paper.

    The result is a batch of feature vectors.
    """
    conv_kwargs = {
        'activation': tf.nn.relu,
        'kernel_initializer': tf.orthogonal_initializer(gain=math.sqrt(2))
    }
    with tf.variable_scope('layer_1'):
        cnn_1 = tf.layers.conv2d(obs_batch, 32, 8, 4, **conv_kwargs)
    with tf.variable_scope('layer_2'):
        cnn_2 = tf.layers.conv2d(cnn_1, 64, 4, 2, **conv_kwargs)
    with tf.variable_scope('layer_3'):
        cnn_3 = tf.layers.conv2d(cnn_2, 64, 3, 1, **conv_kwargs)
    flat_size = product([x.value for x in cnn_3.get_shape()[1:]])
    flat_in = tf.reshape(cnn_3, (tf.shape(cnn_3)[0], int(flat_size)))
    return dense(flat_in, 512, **conv_kwargs) 
Example #7
Source File: network.py    From ppo with MIT License 6 votes vote down vote up
def make_cnn(convs, padding, inpt, initializer=None):
    if initializer is None:
        initializer = tf.orthogonal_initializer(np.sqrt(2.0))
    out = inpt
    with tf.variable_scope('convnet'):
        for num_outputs, kernel_size, stride in convs:
            out = layers.convolution2d(
                out,
                num_outputs=num_outputs,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                activation_fn=tf.nn.relu,
                weights_initializer=initializer
            )
    return out 
Example #8
Source File: fc1024.py    From triplet-reid with MIT License 6 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.identity(endpoints['emb_raw'], name="out_emb")
    
    return endpoints 
Example #9
Source File: fc1024_normalize.py    From triplet-reid with MIT License 6 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb")

    return endpoints 
Example #10
Source File: fc1024.py    From vehicle-triplet-reid with MIT License 6 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')

    return endpoints 
Example #11
Source File: fc1024_normalize.py    From vehicle-triplet-reid with MIT License 6 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1)

    return endpoints 
Example #12
Source File: encoder.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, x, y, params, mode, scope='Encoder', reuse=tf.AUTO_REUSE):
    self.x = x
    self.y = y
    self.params = params
    self.batch_size = tf.shape(x)[0]
    self.vocab_size = params['encoder_vocab_size']
    self.emb_size = params['encoder_emb_size']
    self.hidden_size = params['encoder_hidden_size']
    self.encoder_length = params['encoder_length']
    self.weight_decay = params['weight_decay']
    self.mode = mode
    self.time_major = params['time_major']
    self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_training:
      self.params['encoder_dropout'] = 0.0
      self.params['mlp_dropout'] = 0.0

    #initializer = tf.orthogonal_initializer()
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    tf.get_variable_scope().set_initializer(initializer)
    self.build_graph(scope=scope, reuse=reuse) 
Example #13
Source File: encoder.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, x, y, params, mode, scope='Encoder', reuse=False):
    self.x = x
    self.y = y
    self.params = params
    self.batch_size = tf.shape(x)[0]
    self.vocab_size = params['encoder_vocab_size']
    self.emb_size = params['encoder_emb_size']
    self.hidden_size = params['encoder_hidden_size']
    self.encoder_length = params['encoder_length']
    self.weight_decay = params['weight_decay']
    self.mode = mode
    self.time_major = params['time_major']
    self.weighted_loss = params['weighted_loss']
    self.is_training = self.mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_training:
      self.params['encoder_dropout'] = 0.0
      self.params['mlp_dropout'] = 0.0

    #initializer = tf.orthogonal_initializer()
    self.build_graph(scope, reuse) 
Example #14
Source File: operations.py    From DeepPavlov with Apache License 2.0 6 votes vote down vote up
def matmul_2d(x, out_dimension, drop_prob=None):
    '''Multiplies 2-d tensor by weights.

    Args:
        x: a tensor with shape [batch, dimension]
        out_dimension: a number

    Returns:
        a tensor with shape [batch, out_dimension]

    Raises:
    '''
    W = tf.get_variable(
        name='weights',
        shape=[x.shape[1], out_dimension],
        dtype=tf.float32,
        initializer=tf.orthogonal_initializer())
    if drop_prob is not None:
        W = tf.nn.dropout(W, drop_prob)
        log.info('W is dropout')

    return tf.matmul(x, W) 
Example #15
Source File: direct.py    From vehicle-triplet-reid with MIT License 5 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')

    return endpoints 
Example #16
Source File: decoder.py    From NAO with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,
               encoder_outputs,
               encoder_state,
               target_input,
               target,
               params,
               mode,
               scope=None):
    """Create the model."""
    self.params = params
    self.encoder_outputs = encoder_outputs
    self.encoder_state = encoder_state
    self.target_input = target_input
    self.target = target
    self.batch_size = tf.shape(self.target_input)[0]
    self.mode = mode
    self.vocab_size = params['decoder_vocab_size']
    self.num_layers = params['decoder_num_layers']
    self.decoder_length = params['decoder_length']
    self.time_major = params['time_major']
    self.hidden_size = params['decoder_hidden_size']
    self.weight_decay = params['weight_decay']
    self.is_traing = mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_traing:
      self.params['decoder_dropout'] = 0.0
    self.branch_length = self.decoder_length // 2 // 5 // 2  # 2 types of cell, 5 nodes, 2 branchs

    # Initializer
    #initializer = tf.orthogonal_initializer()
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    tf.get_variable_scope().set_initializer(initializer)

    ## Build graph
    self.build_graph(scope=scope) 
Example #17
Source File: ops.py    From HyperGAN with MIT License 5 votes vote down vote up
def orthogonal_initializer(self, gain):
        def _build(shape):
            return tf.orthogonal_initializer(gain)
        return _build 
Example #18
Source File: hyperparams_builder.py    From aster with MIT License 5 votes vote down vote up
def _build_initializer(initializer):
  """Build a tf initializer from config.

  Args:
    initializer: hyperparams_pb2.Hyperparams.regularizer proto.

  Returns:
    tf initializer.

  Raises:
    ValueError: On unknown initializer.
  """
  initializer_oneof = initializer.WhichOneof('initializer_oneof')
  if initializer_oneof == 'truncated_normal_initializer':
    return tf.truncated_normal_initializer(
        mean=initializer.truncated_normal_initializer.mean,
        stddev=initializer.truncated_normal_initializer.stddev)
  if initializer_oneof == 'variance_scaling_initializer':
    enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                       DESCRIPTOR.enum_types_by_name['Mode'])
    mode = enum_descriptor.values_by_number[initializer.
                                            variance_scaling_initializer.
                                            mode].name
    return layers.variance_scaling_initializer(
        factor=initializer.variance_scaling_initializer.factor,
        mode=mode,
        uniform=initializer.variance_scaling_initializer.uniform)
  if initializer_oneof == 'orthogonal_initializer':
    return tf.orthogonal_initializer(
      gain=initializer.orthogonal_initializer.gain,
      seed=initializer.orthogonal_initializer.seed
    )
  if initializer_oneof == 'uniform_initializer':
    return tf.random_uniform_initializer(
      minval=initializer.uniform_initializer.minval,
      maxval=initializer.uniform_initializer.maxval)
  raise ValueError('Unknown initializer function: {}'.format(
      initializer_oneof)) 
Example #19
Source File: esim.py    From inferbeddings with MIT License 5 votes vote down vote up
def _transform_input(self, sequence, sequence_length, reuse=False):
        with tf.variable_scope('transform_input', reuse=reuse) as _:
            sequence = tf.nn.dropout(sequence, keep_prob=self.dropout_keep_prob)
            cell_fw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            cell_bw = tf.contrib.rnn.LSTMCell(self.representation_size, state_is_tuple=True, reuse=reuse,
                                              initializer=tf.orthogonal_initializer())
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw,
                inputs=sequence, sequence_length=sequence_length,
                dtype=tf.float32)
        return tf.concat(outputs, axis=2) 
Example #20
Source File: direct_normalize.py    From triplet-reid with MIT License 5 votes vote down vote up
def head(endpoints, embedding_dim, is_training):
    endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')
    endpoints['emb'] = tf.nn.l2_normalize(endpoints['emb_raw'], -1, name="out_emb")

    return endpoints 
Example #21
Source File: network.py    From ppo with MIT License 5 votes vote down vote up
def cnn_network(convs,
                fcs,
                use_lstm,
                padding,
                inpt,
                masks,
                rnn_state,
                num_actions,
                lstm_unit,
                nenvs,
                step_size,
                scope):
    out = make_cnn(convs, padding, inpt)
    out = layers.flatten(out)
    out = make_fcs(fcs, out)
    rnn_out, rnn_state = make_lstm(
        lstm_unit, nenvs, step_size, out, masks, rnn_state)

    if use_lstm:
        out = rnn_out

    policy = layers.fully_connected(
        out, num_actions, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(0.1))
    dist = tf.distributions.Categorical(probs=tf.nn.softmax(policy))

    value = layers.fully_connected(
        out, 1, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(1.0))

    return dist, value, rnn_state 
Example #22
Source File: network.py    From ppo with MIT License 5 votes vote down vote up
def make_fcs(fcs, inpt, activation=tf.nn.relu, initializer=None):
    if initializer is None:
        initializer = tf.orthogonal_initializer(np.sqrt(2.0))
    out = inpt
    with tf.variable_scope('hiddens'):
        for hidden in fcs:
            out = layers.fully_connected(out, hidden, activation_fn=activation,
                                         weights_initializer=initializer)
    return out 
Example #23
Source File: SAR.py    From ChID-Dataset with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, num_labels, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length,
                                                       dtype=tf.float32, scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units]
        bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32)
        attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units]
                                     tf.transpose(state_doc, [0, 2, 1]))  # [batch, 2 * num_units, length]
        tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1])
        attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True))
        #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length]
        state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units]

        match_matrix = tf.einsum("abcd,abd->abc", can_embedding, state_attention) # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm) 
Example #24
Source File: LM.py    From ChID-Dataset with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, num_labels, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32,
                                                       scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2)  # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc)  # [batch, labels, 2 * num_units]
        match_matrix = tf.einsum("abcd,abd->abc", can_embedding, blanks_states)  # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm) 
Example #25
Source File: classifiers.py    From Parser-v3 with Apache License 2.0 5 votes vote down vote up
def hiddens(layer, hidden_sizes, hidden_func=nonlin.relu, hidden_keep_prob=1.):
  """"""

  layer_shape = nn.get_sizes(layer)
  input_size = layer_shape.pop()
  weights = []
  for i, hidden_size in enumerate(hidden_sizes):
    weights.append(tf.get_variable('Weights-%d' % i, shape=[input_size, hidden_size]))#, initializer=tf.orthogonal_initializer))
  weights = tf.concat(weights, axis=1)
  hidden_size = sum(hidden_sizes)
  biases = tf.get_variable('Biases', shape=[hidden_size], initializer=tf.zeros_initializer)
  if hidden_keep_prob < 1.:
    if len(layer_shape) > 1:
      noise_shape = tf.stack(layer_shape[:-1] + [1, input_size])
    else:
      noise_shape = None
    layer = nn.dropout(layer, hidden_keep_prob, noise_shape=noise_shape)
  
  layer = nn.reshape(layer, [-1, input_size])
  layer = tf.matmul(layer, weights) + biases
  layer = hidden_func(layer)
  layer = nn.reshape(layer, layer_shape + [hidden_size])
  layers = tf.split(layer, hidden_sizes, axis=-1)
  return layers

#=============================================================== 
Example #26
Source File: SAR.py    From ChID-Dataset with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length,
                                                       dtype=tf.float32, scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2) # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc) # query, [batch, labels, 2 * num_units]
        bilinear_attention = tf.get_variable("bilinear_attention", [2 * num_units, 2 * num_units], tf.float32)
        attention_matrix = tf.matmul(tf.einsum("abc,cd->abd", blanks_states, bilinear_attention), # [batch, labels, 2 * num_units]
                                     tf.transpose(state_doc, [0, 2, 1]))  # [batch, 2 * num_units, length]
        tmp = tf.exp(attention_matrix) * tf.tile(tf.expand_dims(self.mask, axis=1), [1, tf.shape(blanks_states)[1], 1])
        attention = tf.div(tmp, tf.reduce_sum(tmp, axis=-1, keep_dims=True))
        #attention = tf.nn.softmax(attention_matrix) # [batch, labels, length]
        state_attention = tf.matmul(attention, state_doc) # [batch, labels, 2 * num_units]

        match_matrix = tf.matmul(state_attention, tf.transpose(can_embedding, [0, 2, 1])) # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm) 
Example #27
Source File: LM.py    From ChID-Dataset with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 learning_rate,
                 init_word_embed,
                 init_idiom_embed,
                 size_embed=200,
                 num_units=100, # make sure that num_units = size_embed / 2
                 max_gradient_norm=5.0):

        assert size_embed == 2 * num_units

        super(Model, self).__init__()
        super(Model, self)._create_embedding(init_word_embed, init_idiom_embed)

        doc_embedding = tf.cond(self.is_train,
                                lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.word_embed_matrix, self.document), 0.5),
                                lambda: tf.nn.embedding_lookup(self.word_embed_matrix, self.document))
        # [batch, length, size_embed]
        can_embedding = tf.nn.embedding_lookup(self.idiom_embed_matrix, self.candidates)  # [batch, 10, size_embed]

        with tf.variable_scope("doc"):
            cell_fw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            cell_bw_doc = tf.nn.rnn_cell.LSTMCell(num_units, initializer=tf.orthogonal_initializer())
            h_doc, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw_doc, cell_bw_doc, doc_embedding, self.doc_length, dtype=tf.float32,
                                                       scope="bi_lstm")
            state_doc = tf.concat(h_doc, 2)  # [batch, length, 2 * num_units]

        blanks_states = tf.matmul(self.locations, state_doc)  # [batch, labels, 2 * num_units]
        match_matrix = tf.matmul(blanks_states, tf.transpose(can_embedding, [0, 2, 1]))  # [batch, labels, 10]
        self.logits = tf.nn.softmax(match_matrix)

        super(Model, self)._create_loss()
        super(Model, self)._create_train_step(learning_rate, max_gradient_norm) 
Example #28
Source File: ortho_gru_cell.py    From neuralmonkey with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def call(self, inputs, state):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope("gates"):
            input_to_gates = tf.layers.dense(
                inputs, 2 * self._num_units, name="input_proj",
                use_bias=self.use_input_bias)

            # Nematus does the orthogonal initialization probably differently
            state_to_gates = tf.layers.dense(
                state, 2 * self._num_units,
                use_bias=self.use_state_bias,
                kernel_initializer=orthogonal_initializer(),
                name="state_proj")

            gates_input = state_to_gates + input_to_gates
            reset, update = tf.split(
                tf.sigmoid(gates_input), num_or_size_splits=2, axis=1)

        with tf.variable_scope("candidate"):
            input_to_candidate = tf.layers.dense(
                inputs, self._num_units, use_bias=self.use_input_bias,
                name="input_proj")

            state_to_candidate = tf.layers.dense(
                state, self._num_units, use_bias=self.use_state_bias,
                kernel_initializer=orthogonal_initializer(),
                name="state_proj")

            candidate = self._activation(
                state_to_candidate * reset + input_to_candidate)

        new_state = update * state + (1 - update) * candidate
        return new_state, new_state 
Example #29
Source File: decoder.py    From NAO with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,
               encoder_outputs,
               encoder_state,
               target_input,
               target,
               params,
               mode,
               scope=None,
               reuse=False):
    """Create the model."""
    self.params = params
    self.encoder_outputs = encoder_outputs
    self.encoder_state = encoder_state
    self.target_input = target_input
    self.target = target
    self.batch_size = tf.shape(self.target_input)[0]
    self.mode = mode
    self.vocab_size = params['decoder_vocab_size']
    self.num_layers = params['decoder_num_layers']
    self.time_major = params['time_major']
    self.hidden_size = params['decoder_hidden_size']
    self.weight_decay = params['weight_decay']
    self.wn = params['wn']
    self.is_traing = mode == tf.estimator.ModeKeys.TRAIN
    if not self.is_traing:
      self.params['decoder_dropout'] = 0.0

    # Initializer
    #initializer = tf.orthogonal_initializer()
    ## Build graph
    self.build_graph(scope, reuse) 
Example #30
Source File: ortho_gru_cell.py    From neuralmonkey with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, num_units, activation=None, reuse=None):
        tf.contrib.rnn.GRUCell.__init__(
            self, num_units, activation, reuse,
            kernel_initializer=tf.orthogonal_initializer())