Python tensorflow.matrix_band_part() Examples
The following are 30
code examples of tensorflow.matrix_band_part().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: transformer_attentions.py From Counterfactual-StoryRW with MIT License | 6 votes |
def _ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): """Matrix band part of ones. """ if all([isinstance(el, int) for el in [rows, cols, num_lower, num_upper]]): # Needed info is constant, so we construct in numpy if num_lower < 0: num_lower = rows - 1 if num_upper < 0: num_upper = cols - 1 lower_mask = np.tri(cols, rows, num_lower).T upper_mask = np.tri(rows, cols, num_upper) band = np.ones((rows, cols)) * lower_mask * upper_mask if out_shape: band = band.reshape(out_shape) band = tf.constant(band, tf.float32) else: band = tf.matrix_band_part(tf.ones([rows, cols]), tf.cast(num_lower, tf.int64), tf.cast(num_upper, tf.int64)) if out_shape: band = tf.reshape(band, out_shape) return band
Example #2
Source File: vgp.py From VFF with Apache License 2.0 | 6 votes |
def _build_predict_train(self): Kuf = self._Kuf Kuu = [make_Kuu(kern, a, b, self.ms) for kern, a, b, in zip(self.kerns, self.a, self.b)] KiKuf = [Kuu_d.solve(Kuf_d) for Kuu_d, Kuf_d in zip(Kuu, Kuf)] KfuKi = [tf.transpose(mat) for mat in KiKuf] mu = kvs_dot_vec(KfuKi, self.q_mu) L = tf.matrix_band_part(self.q_sqrt, -1, 0) tmp1 = kvs_dot_mat(KfuKi, L, num_cols=np.prod(self.Ms)) # Kff: var = reduce(tf.multiply, [k.Kdiag(self.X[:, i:i+1]) for i, k in enumerate(self.kerns)]) # Projected variance Kfu Ki [WWT] Ki Kuf # var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp1_d), 0) for tmp1_d in tmp1]) var = var + tf.reduce_sum(tf.square(tmp1), 1) # Qff var = var - reduce(tf.multiply, [tf.reduce_sum(Kuf_d * KiKuf_d, 0) for Kuf_d, KiKuf_d in zip(Kuf, KiKuf)]) var = tf.reshape(var, (-1, 1)) return mu, var
Example #3
Source File: utils.py From OpenSeq2Seq with Apache License 2.0 | 6 votes |
def get_decoder_self_attention_bias(length, dtype=tf.float32): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ #print("get_decoder_self_attention_bias", dtype) with tf.name_scope("decoder_self_attention_bias"): #valid_locs = tf.matrix_band_part(tf.ones([length, length], dtype=dtype), -1, 0) valid_locs = tf.matrix_band_part(tf.ones([length, length], dtype=tf.float32), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) neg_inf=_NEG_INF #if (dtype==tf.float32) else _NEG_INF_FP16 bias = neg_inf * (1.0 - valid_locs) #bias=tf.saturate_cast(bias, dtype=dtype) return bias
Example #4
Source File: matrix_band_part_op_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def _GetMatrixBandPartTest(dtype_, batch_shape_, shape_): def Test(self): mat = np.ones(shape_).astype(dtype_) batch_mat = np.tile(mat, batch_shape + (1, 1)) with self.test_session(use_gpu=True): for lower in -1, 0, 1, shape_[-2] - 1: for upper in -1, 0, 1, shape_[-1] - 1: band_np = mat if lower >= 0: band_np = np.triu(band_np, -lower) if upper >= 0: band_np = np.tril(band_np, upper) if batch_shape is not (): band_np = np.tile(band_np, batch_shape + (1, 1)) band = tf.matrix_band_part(batch_mat, lower, upper) self.assertAllEqual(band_np, band.eval()) return Test
Example #5
Source File: transformer_attentions.py From texar with Apache License 2.0 | 6 votes |
def _ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): """Matrix band part of ones. """ if all([isinstance(el, int) for el in [rows, cols, num_lower, num_upper]]): # Needed info is constant, so we construct in numpy if num_lower < 0: num_lower = rows - 1 if num_upper < 0: num_upper = cols - 1 lower_mask = np.tri(cols, rows, num_lower).T upper_mask = np.tri(rows, cols, num_upper) band = np.ones((rows, cols)) * lower_mask * upper_mask if out_shape: band = band.reshape(out_shape) band = tf.constant(band, tf.float32) else: band = tf.matrix_band_part(tf.ones([rows, cols]), tf.cast(num_lower, tf.int64), tf.cast(num_upper, tf.int64)) if out_shape: band = tf.reshape(band, out_shape) return band
Example #6
Source File: model_utils.py From models with Apache License 2.0 | 6 votes |
def get_decoder_self_attention_bias(length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ with tf.name_scope("decoder_self_attention_bias"): valid_locs = tf.matrix_band_part(tf.ones([length, length]), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) decoder_bias = _NEG_INF * (1.0 - valid_locs) return decoder_bias
Example #7
Source File: model_utils.py From models with Apache License 2.0 | 6 votes |
def get_decoder_self_attention_bias(length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ with tf.name_scope("decoder_self_attention_bias"): valid_locs = tf.matrix_band_part(tf.ones([length, length]), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) decoder_bias = _NEG_INF * (1.0 - valid_locs) return decoder_bias
Example #8
Source File: model_utils.py From models with Apache License 2.0 | 6 votes |
def get_decoder_self_attention_bias(length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ with tf.name_scope("decoder_self_attention_bias"): valid_locs = tf.matrix_band_part(tf.ones([length, length]), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) decoder_bias = _NEG_INF * (1.0 - valid_locs) return decoder_bias
Example #9
Source File: model_utils.py From models with Apache License 2.0 | 6 votes |
def get_decoder_self_attention_bias(length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ with tf.name_scope("decoder_self_attention_bias"): valid_locs = tf.matrix_band_part(tf.ones([length, length]), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) decoder_bias = _NEG_INF * (1.0 - valid_locs) return decoder_bias
Example #10
Source File: common_layers.py From fine-lm with MIT License | 6 votes |
def ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): """Matrix band part of ones.""" if all([isinstance(el, int) for el in [rows, cols, num_lower, num_upper]]): # Needed info is constant, so we construct in numpy if num_lower < 0: num_lower = rows - 1 if num_upper < 0: num_upper = cols - 1 lower_mask = np.tri(cols, rows, num_lower).T upper_mask = np.tri(rows, cols, num_upper) band = np.ones((rows, cols)) * lower_mask * upper_mask if out_shape: band = band.reshape(out_shape) band = tf.constant(band, tf.float32) else: band = tf.matrix_band_part( tf.ones([rows, cols]), tf.cast(num_lower, tf.int64), tf.cast(num_upper, tf.int64)) if out_shape: band = tf.reshape(band, out_shape) return band
Example #11
Source File: common_layers.py From ASR with Apache License 2.0 | 5 votes |
def mask_leq(target_length, source_length): """A mask with 1.0 wherever source_pos <= target_pos and 0.0 elsewhere. Args: target_length: an integer source_length: an integer Returns: a Tensor with shape [1, target_length, source_length] """ return tf.expand_dims( tf.matrix_band_part(tf.ones([target_length, source_length]), -1, 0), 0)
Example #12
Source File: layers.py From NeuralEDUSeg with Apache License 2.0 | 5 votes |
def self_attention(inputs, lengths, window_size=-1, scope='bilinear_attention', reuse=None): with tf.variable_scope(scope, reuse=reuse): # logits = tf.matmul(inputs, inputs, transpose_b=True) # Q * K logits = trilinear_similarity(inputs, inputs) mask = tf.sequence_mask(lengths, tf.shape(inputs)[1], tf.float32) mask = tf.expand_dims(mask, 1) if window_size > 0: restricted_mask = tf.matrix_band_part(tf.ones_like(logits, dtype=tf.float32), window_size, window_size) mask = mask * restricted_mask logits = mask_logits(logits, mask) weights = tf.nn.softmax(logits, name='attn_weights') return tf.matmul(weights, inputs), weights
Example #13
Source File: xlnet_encoder.py From texar with Apache License 2.0 | 5 votes |
def _create_mask(self, qlen, mlen, dtype=tf.float32, same_length=False): r"""Create causal attention mask.""" attn_mask = tf.ones([qlen, qlen], dtype=dtype) mask_u = tf.matrix_band_part(attn_mask, 0, -1) mask_dia = tf.matrix_band_part(attn_mask, 0, 0) attn_mask_pad = tf.zeros([qlen, mlen], dtype=dtype) ret = tf.concat([attn_mask_pad, mask_u - mask_dia], axis=1) if same_length: mask_l = tf.matrix_band_part(attn_mask, -1, 0) ret = tf.concat([ret[:, :qlen] + mask_l - mask_dia, ret[:, qlen:]], axis=1) return ret
Example #14
Source File: continuous_actions.py From tensorflow-rl with Apache License 2.0 | 5 votes |
def _build_q_head(self, input_state): self.w_value, self.b_value, self.value = layers.fc('fc_value', input_state, 1, activation='linear') self.w_L, self.b_L, self.L_full = layers.fc('L_full', input_state, self.num_actions, activation='linear') self.w_mu, self.b_mu, self.mu = layers.fc('mu', input_state, self.num_actions, activation='linear') #elements above the main diagonal in L_full are unused D = tf.matrix_band_part(tf.exp(self.L_full) - L_full, 0, 0) L = tf.matrix_band_part(L_full, -1, 0) + D LT_u_minus_mu = tf.einsum('ikj,ik', L, self.selected_action_ph - self.mu) self.advantage = tf.einsum('ijk,ikj->i', LT_u_minus_mu, LT_u_minus_mu) q_selected_action = self.value + self.advantage diff = tf.subtract(self.target_ph, q_selected_action) return self._value_function_loss(diff)
Example #15
Source File: common_attention.py From ASR with Apache License 2.0 | 5 votes |
def attention_bias_lower_triangle(length): """Create an bias tensor to be added to attention logits. Args: length: a Scalar. Returns: a `Tensor` with shape [1, 1, length, length]. """ lower_triangle = tf.matrix_band_part(tf.ones([length, length]), -1, 0) ret = -1e9 * (1.0 - lower_triangle) return tf.reshape(ret, [1, 1, length, length])
Example #16
Source File: model.py From galois-autocompleter with MIT License | 5 votes |
def attention_mask(nd, ns, *, dtype): """1's in the lower triangle, counting from the lower right corner. Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs. """ i = tf.range(nd)[:,None] j = tf.range(ns) m = i >= j - ns + nd return tf.cast(m, dtype)
Example #17
Source File: common_layers.py From NMT_GAN with Apache License 2.0 | 5 votes |
def mask_leq(target_length, source_length): """A mask with 1.0 wherever source_pos <= target_pos and 0.0 elsewhere. Args: target_length: an integer source_length: an integer Returns: a Tensor with shape [1, target_length, source_length] """ return tf.expand_dims( tf.matrix_band_part(tf.ones([target_length, source_length]), -1, 0), 0)
Example #18
Source File: attention.py From Document-Transformer with BSD 3-Clause "New" or "Revised" License | 5 votes |
def attention_bias(inputs, mode, inf=-1e9, name=None): """ A bias tensor used in attention mechanism :param inputs: A tensor :param mode: one of "causal", "masking", "proximal" or "distance" :param inf: A floating value :param name: optional string :returns: A 4D tensor with shape [batch, heads, queries, memories] """ with tf.name_scope(name, default_name="attention_bias", values=[inputs]): if mode == "causal": length = inputs lower_triangle = tf.matrix_band_part( tf.ones([length, length]), -1, 0 ) ret = inf * (1.0 - lower_triangle) return tf.reshape(ret, [1, 1, length, length]) elif mode == "masking": mask = inputs ret = (1.0 - mask) * inf return tf.expand_dims(tf.expand_dims(ret, 1), 1) elif mode == "proximal": length = inputs r = tf.to_float(tf.range(length)) diff = tf.expand_dims(r, 0) - tf.expand_dims(r, 1) m = tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0) return m elif mode == "distance": length, distance = inputs distance = tf.where(distance > length, 0, distance) distance = tf.cast(distance, tf.int64) lower_triangle = tf.matrix_band_part( tf.ones([length, length]), -1, 0 ) mask_triangle = 1.0 - tf.matrix_band_part( tf.ones([length, length]), distance - 1, 0 ) ret = inf * (1.0 - lower_triangle + mask_triangle) return tf.reshape(ret, [1, 1, length, length]) else: raise ValueError("Unknown mode %s" % mode)
Example #19
Source File: transformer.py From deep_dialog_tutorial with MIT License | 5 votes |
def _create_dec_self_attention_mask(self, decoder_input: tf.Tensor): with tf.name_scope('dec_self_attention_mask'): batch_size, length = tf.unstack(tf.shape(decoder_input)) pad_array = tf.equal(decoder_input, PAD_ID) # [batch_size, m_length] pad_array = tf.reshape(pad_array, [batch_size, 1, 1, length]) autoregression_array = tf.logical_not( tf.matrix_band_part(tf.ones([length, length], dtype=tf.bool), -1, 0)) # 下三角が False autoregression_array = tf.reshape(autoregression_array, [1, 1, length, length]) return tf.logical_or(pad_array, autoregression_array)
Example #20
Source File: vgp.py From VFF with Apache License 2.0 | 5 votes |
def _build_predict(self, X, full_cov=False): # given self.q(v), compute q(f) Kuf = [make_Kuf(k, X[:, i:i+1], a, b, self.ms) for i, (k, a, b) in enumerate(zip(self.kerns, self.a, self.b))] Kuu = [make_Kuu(kern, a, b, self.ms) for kern, a, b, in zip(self.kerns, self.a, self.b)] KiKuf = [Kuu_d.solve(Kuf_d) for Kuu_d, Kuf_d in zip(Kuu, Kuf)] KfuKi = [tf.transpose(mat) for mat in KiKuf] mu = kvs_dot_vec(KfuKi, self.q_mu) L = tf.matrix_band_part(self.q_sqrt, -1, 0) tmp1 = kvs_dot_mat(KfuKi, L, np.prod(self.Ms)) if full_cov: raise NotImplementedError else: # Kff: var = reduce(tf.multiply, [k.Kdiag(X[:, i:i+1]) for i, k in enumerate(self.kerns)]) # Projected variance Kfu Ki [WWT] Ki Kuf # var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp1_d), 0) for tmp1_d in tmp1]) var = var + tf.reduce_sum(tf.square(tmp1), 1) # Qff var = var - reduce(tf.multiply, [tf.reduce_sum(Kuf_d * KiKuf_d, 0) for Kuf_d, KiKuf_d in zip(Kuf, KiKuf)]) var = tf.reshape(var, (-1, 1)) return mu, var
Example #21
Source File: vgp.py From VFF with Apache License 2.0 | 5 votes |
def _build_predict_train(self): Kuf = self._Kuf Kuu = [make_Kuu(kern, a, b, self.ms) for kern, a, b, in zip(self.kerns, self.a, self.b)] KiKuf = [Kuu_d.solve(Kuf_d) for Kuu_d, Kuf_d in zip(Kuu, Kuf)] KfuKi = [tf.transpose(mat) for mat in KiKuf] mu = kvs_dot_vec(KfuKi, self.q_mu) # Kff: var = reduce(tf.multiply, [k.Kdiag(self.X[:, i:i+1]) for i, k in enumerate(self.kerns)]) # Projected variance Kfu Ki [WWT] Ki Kuf Ls = [tf.matrix_band_part(q_sqrt_d, -1, 0) for q_sqrt_d in self.q_sqrt_kron] tmp = [tf.matmul(tf.transpose(L), KiKuf_d) for L, KiKuf_d in zip(Ls, KiKuf)] var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp_d), 0) for tmp_d in tmp]) if self.use_two_krons: Ls = [tf.matrix_band_part(q_sqrt_d, -1, 0) for q_sqrt_d in self.q_sqrt_kron_2] tmp = [tf.matmul(tf.transpose(L), KiKuf_d) for L, KiKuf_d in zip(Ls, KiKuf)] var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp_d), 0) for tmp_d in tmp]) elif self.use_extra_ranks: for i in range(self.use_extra_ranks): tmp = kvs_dot_vec(KfuKi, self.q_sqrt_W[:, i:i+1]) var = var + tf.reduce_sum(tf.square(tmp), 1) # Qff var = var - reduce(tf.multiply, [tf.reduce_sum(Kuf_d * KiKuf_d, 0) for Kuf_d, KiKuf_d in zip(Kuf, KiKuf)]) return mu, tf.reshape(var, [-1, 1])
Example #22
Source File: vgp.py From VFF with Apache License 2.0 | 5 votes |
def _build_predict(self, X, full_cov=False): # given self.q(v), compute q(f) Kuf = [make_Kuf(k, X[:, i:i+1], a, b, self.ms) for i, (k, a, b) in enumerate(zip(self.kerns, self.a, self.b))] Kuu = [make_Kuu(kern, a, b, self.ms) for kern, a, b, in zip(self.kerns, self.a, self.b)] KiKuf = [Kuu_d.solve(Kuf_d) for Kuu_d, Kuf_d in zip(Kuu, Kuf)] KfuKi = [tf.transpose(mat) for mat in KiKuf] mu = kvs_dot_vec(KfuKi, self.q_mu) if full_cov: raise NotImplementedError else: # Kff: var = reduce(tf.multiply, [k.Kdiag(X[:, i:i+1]) for i, k in enumerate(self.kerns)]) # Projected variance Kfu Ki [WWT] Ki Kuf Ls = [tf.matrix_band_part(q_sqrt_d, -1, 0) for q_sqrt_d in self.q_sqrt_kron] tmp = [tf.matmul(tf.transpose(L), KiKuf_d) for L, KiKuf_d in zip(Ls, KiKuf)] var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp_d), 0) for tmp_d in tmp]) if self.use_two_krons: Ls = [tf.matrix_band_part(q_sqrt_d, -1, 0) for q_sqrt_d in self.q_sqrt_kron_2] tmp = [tf.matmul(tf.transpose(L), KiKuf_d) for L, KiKuf_d in zip(Ls, KiKuf)] var = var + reduce(tf.multiply, [tf.reduce_sum(tf.square(tmp_d), 0) for tmp_d in tmp]) elif self.use_extra_ranks: for i in range(self.use_extra_ranks): tmp = kvs_dot_vec(KfuKi, self.q_sqrt_W[:, i:i+1]) var = var + tf.reduce_sum(tf.square(tmp), 1) # Qff var = var - reduce(tf.multiply, [tf.reduce_sum(Kuf_d * KiKuf_d, 0) for Kuf_d, KiKuf_d in zip(Kuf, KiKuf)]) var = tf.reshape(var, (-1, 1)) return mu, var
Example #23
Source File: common_attention.py From NJUNMT-tf with Apache License 2.0 | 5 votes |
def attention_bias_lower_triangle(length): """ Create a bias tensor to be added to attention logits. Allows a query to attend to all positions up to and including its own. Args: length: A scalar. Returns: A float Tensor of shape [1, 1, length, length], with -1e9 in padding positions and 0 in non-padding positions. """ lower_triangle = tf.matrix_band_part(tf.ones([length, length]), -1, 0) ret = FLOAT_MIN * (1. - lower_triangle) return tf.reshape(ret, [1, 1, length, length])
Example #24
Source File: modeling_gpt2.py From Decoders-Chinese-TF2.0 with MIT License | 5 votes |
def causal_attention_mask(nd, ns, dtype): """1's in the lower triangle, counting from the lower right corner. Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs. """ i = tf.range(nd)[:, None] j = tf.range(ns) m = i >= j - ns + nd return tf.cast(m, dtype)
Example #25
Source File: modeling_tf_gpt2.py From exbert with Apache License 2.0 | 5 votes |
def causal_attention_mask(nd, ns, dtype): """1's in the lower triangle, counting from the lower right corner. Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs. """ i = tf.range(nd)[:, None] j = tf.range(ns) m = i >= j - ns + nd return tf.cast(m, dtype)
Example #26
Source File: modeling_tf_openai.py From exbert with Apache License 2.0 | 5 votes |
def causal_attention_mask(nd, ns, dtype): """1's in the lower triangle, counting from the lower right corner. Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs. """ i = tf.range(nd)[:, None] j = tf.range(ns) m = i >= j - ns + nd return tf.cast(m, dtype)
Example #27
Source File: modeling_tf_xlnet.py From exbert with Apache License 2.0 | 5 votes |
def create_mask(self, qlen, mlen, dtype=tf.float32): """ Creates causal attention mask. Float mask where 1.0 indicates masked, 0.0 indicates not-masked. Args: qlen: TODO Lysandre didn't fill mlen: TODO Lysandre didn't fill :: same_length=False: same_length=True: <mlen > < qlen > <mlen > < qlen > ^ [0 0 0 0 0 1 1 1 1] [0 0 0 0 0 1 1 1 1] [0 0 0 0 0 0 1 1 1] [1 0 0 0 0 0 1 1 1] qlen [0 0 0 0 0 0 0 1 1] [1 1 0 0 0 0 0 1 1] [0 0 0 0 0 0 0 0 1] [1 1 1 0 0 0 0 0 1] v [0 0 0 0 0 0 0 0 0] [1 1 1 1 0 0 0 0 0] """ attn_mask = tf.ones([qlen, qlen], dtype=dtype) mask_u = tf.matrix_band_part(attn_mask, 0, -1) mask_dia = tf.matrix_band_part(attn_mask, 0, 0) attn_mask_pad = tf.zeros([qlen, mlen], dtype=dtype) ret = tf.concat([attn_mask_pad, mask_u - mask_dia], 1) if self.same_length: mask_l = tf.matrix_band_part(attn_mask, -1, 0) ret = tf.concat([ret[:, :qlen] + mask_l - mask_dia, ret[:, qlen:]], 1) return ret
Example #28
Source File: model.py From QANet_dureader with MIT License | 5 votes |
def _decode(self): N, PL, QL, CL, d, dc, nh = self._params() if self.config.use_position_attn: start_logits = tf.squeeze( conv(self._attention(tf.concat([self.enc[1], self.enc[2]], axis = -1), name="attn1"), 1, bias = False, name = "start_pointer"), -1) end_logits = tf.squeeze( conv(self._attention(tf.concat([self.enc[1], self.enc[3]], axis = -1), name="attn2"), 1, bias = False, name = "end_pointer"), -1) else: start_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[2]], axis = -1), 1, bias = False, name = "start_pointer"), -1) end_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[3]], axis = -1), 1, bias = False, name = "end_pointer"), -1) self.logits = [mask_logits(start_logits, mask = tf.reshape(self.c_mask, [N, -1])), mask_logits(end_logits, mask = tf.reshape(self.c_mask, [N, -1]))] self.logits1, self.logits2 = [l for l in self.logits] outer = tf.matmul(tf.expand_dims(tf.nn.softmax(self.logits1), axis=2), tf.expand_dims(tf.nn.softmax(self.logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, self.max_a_len) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
Example #29
Source File: common_attention.py From NMT_GAN with Apache License 2.0 | 5 votes |
def attention_bias_lower_triangle(length): """Create an bias tensor to be added to attention logits. Args: length: a Scalar. Returns: a `Tensor` with shape [1, 1, length, length]. """ lower_triangle = tf.matrix_band_part(tf.ones([length, length]), -1, 0) ret = -1e9 * (1.0 - lower_triangle) return tf.reshape(ret, [1, 1, length, length])
Example #30
Source File: transformer_decoder.py From bert-multitask-learning with MIT License | 5 votes |
def get_decoder_self_attention_mask(self, length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ with tf.name_scope("decoder_self_attention_mask"): valid_locs = tf.matrix_band_part(tf.ones([length, length]), -1, 0) valid_locs = tf.reshape(valid_locs, [1, length, length]) return valid_locs