Python torch.bernoulli() Examples
The following are 30
code examples of torch.bernoulli().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: layers.py From lung_nodule_detector with MIT License | 6 votes |
def forward(self, x0, x1, x2, x3): if self.p > 0 and self.training: coef = torch.bernoulli((1.0 - self.p) * torch.ones(8)) out1 = coef[0] * self.block01(x0) + coef[1] * self.block11(x1) + coef[2] * self.block21(x2) out2 = coef[3] * self.block12(x1) + coef[4] * self.block22(x2) + coef[5] * self.block32(x3) out3 = coef[6] * self.block23(x2) + coef[7] * self.block33(x3) else: out1 = (1 - self.p) * (self.block01(x0) + self.block11(x1) + self.block21(x2)) out2 = (1 - self.p) * (self.block12(x1) + self.block22(x2) + self.block32(x3)) out3 = (1 - self.p) * (self.block23(x2) + self.block33(x3)) if self.integrate: out1 += x1 out2 += x2 out3 += x3 return x0, self.relu(out1), self.relu(out2), self.relu(out3)
Example #2
Source File: bert.py From fastHan with Apache License 2.0 | 6 votes |
def drop_word(self, words): """ 按照设定随机将words设置为unknown_index。 :param torch.LongTensor words: batch_size x max_len :return: """ if self.word_dropout > 0 and self.training: with torch.no_grad(): if self._word_sep_index: # 不能drop sep sep_mask = words.eq(self._wordpiece_unk_index) mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device) mask = torch.bernoulli(mask).eq(1) # dropout_word越大,越多位置为1 pad_mask = words.ne(self._wordpiece_pad_index) mask = pad_mask.__and__(mask) # pad的位置不为unk words = words.masked_fill(mask, self._word_unk_index) if self._word_sep_index: words.masked_fill_(sep_mask, self._wordpiece_unk_index) return words
Example #3
Source File: network_blocks.py From ASFF with GNU General Public License v3.0 | 6 votes |
def forward(self, x): if (not self.training or self.keep_prob==1): #set keep_prob=1 to turn off dropblock return x if self.gamma is None: self.gamma = self.calculate_gamma(x) if x.type() == 'torch.cuda.HalfTensor': #TODO: not fully support for FP16 now FP16 = True x = x.float() else: FP16 = False p = torch.ones_like(x) * (self.gamma) mask = 1 - torch.nn.functional.max_pool2d(torch.bernoulli(p), self.kernel_size, self.stride, self.padding) out = mask * x * (mask.numel()/mask.sum()) if FP16: out = out.half() return out
Example #4
Source File: bert.py From fastHan with Apache License 2.0 | 6 votes |
def drop_word(self, words): """ 按照设定随机将words设置为unknown_index。 :param torch.LongTensor words: batch_size x max_len :return: """ if self.word_dropout > 0 and self.training: with torch.no_grad(): if self._word_sep_index: # 不能drop sep sep_mask = words.eq(self._word_sep_index) mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device) mask = torch.bernoulli(mask).eq(1) # dropout_word越大,越多位置为1 pad_mask = words.ne(0) mask = pad_mask.__and__(mask) # pad的位置不为unk words = words.masked_fill(mask, self._word_unk_index) if self._word_sep_index: words.masked_fill_(sep_mask, self._word_sep_index) return words
Example #5
Source File: layers.py From DeepLung with GNU General Public License v3.0 | 6 votes |
def forward(self, x0, x1, x2, x3): if self.p > 0 and self.training: coef = torch.bernoulli((1.0 - self.p) * torch.ones(8)) out1 = coef[0] * self.block01(x0) + coef[1] * self.block11(x1) + coef[2] * self.block21(x2) out2 = coef[3] * self.block12(x1) + coef[4] * self.block22(x2) + coef[5] * self.block32(x3) out3 = coef[6] * self.block23(x2) + coef[7] * self.block33(x3) else: out1 = (1 - self.p) * (self.block01(x0) + self.block11(x1) + self.block21(x2)) out2 = (1 - self.p) * (self.block12(x1) + self.block22(x2) + self.block32(x3)) out3 = (1 - self.p) * (self.block23(x2) + self.block33(x3)) if self.integrate: out1 += x1 out2 += x2 out3 += x3 return x0, self.relu(out1), self.relu(out2), self.relu(out3)
Example #6
Source File: masker.py From BERT-for-RRC-ABSA with Apache License 2.0 | 6 votes |
def gen_inputs_labels(self, inputs, masked_indices): # We sample a few tokens in each sequence for masked-LM training (with probability mlm_probability defaults to 0.15 in Bert/RoBERTa) inputs = inputs.clone() labels = inputs.clone() labels[~masked_indices] = -100 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(self.tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
Example #7
Source File: roberta_embedding.py From fastNLP with Apache License 2.0 | 6 votes |
def drop_word(self, words): r""" 按照设定随机将words设置为unknown_index。 :param torch.LongTensor words: batch_size x max_len :return: """ if self.word_dropout > 0 and self.training: with torch.no_grad(): mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device) mask = torch.bernoulli(mask).eq(1) # dropout_word越大,越多位置为1 pad_mask = words.ne(self._word_pad_index) mask = pad_mask.__and__(mask) # pad的位置不为unk if self._word_sep_index!=-100: not_sep_mask = words.ne(self._word_sep_index) mask = mask.__and__(not_sep_mask) if self._word_cls_index!=-100: not_cls_mask = words.ne(self._word_cls_index) mask = mask.__and__(not_cls_mask) words = words.masked_fill(mask, self._word_unk_index) return words
Example #8
Source File: roberta_embedding.py From fastNLP with Apache License 2.0 | 6 votes |
def drop_word(self, words): r""" 按照设定随机将words设置为unknown_index。 :param torch.LongTensor words: batch_size x max_len :return: """ if self.word_dropout > 0 and self.training: with torch.no_grad(): not_sep_mask = words.ne(self._sep_index) not_cls_mask = words.ne(self._cls_index) replaceable_mask = not_sep_mask.__and__(not_cls_mask) mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device) mask = torch.bernoulli(mask).eq(1) # dropout_word越大,越多位置为1 pad_mask = words.ne(self._wordpiece_pad_index) mask = pad_mask.__and__(mask).__and__(replaceable_mask) # pad的位置不为unk words = words.masked_fill(mask, self._wordpiece_unk_index) return words
Example #9
Source File: bernoulli.py From amortized-variational-filtering with MIT License | 6 votes |
def sample(self, n_samples=1, resample=False): """ Draw samples from the distribution. Args: n_samples (int): number of samples to draw resample (bool): whether to resample or just use current sample """ if self._sample is None or resample: assert self.mean is not None, 'Mean is None.' mean = self.mean if len(mean.size()) == 2: mean = mean.unsqueeze(1).repeat(1, n_samples, 1) elif len(mean.size()) == 4: mean = mean.unsqueeze(1).repeat(1, n_samples, 1, 1, 1) self._sample = torch.bernoulli(mean) return self._sample
Example #10
Source File: bert_embedding.py From fastNLP with Apache License 2.0 | 6 votes |
def drop_word(self, words): r""" 按照设定随机将words设置为unknown_index。 :param torch.LongTensor words: batch_size x max_len :return: """ if self.word_dropout > 0 and self.training: with torch.no_grad(): mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device) mask = torch.bernoulli(mask).eq(1) # dropout_word越大,越多位置为1 pad_mask = words.ne(self._word_pad_index) mask = pad_mask.__and__(mask) # pad的位置不为unk if self._word_sep_index!=-100: not_sep_mask = words.ne(self._word_sep_index) mask = mask.__and__(not_sep_mask) if self._word_cls_index!=-100: not_cls_mask = words.ne(self._word_cls_index) mask = mask.__and__(not_cls_mask) words = words.masked_fill(mask, self._word_unk_index) return words
Example #11
Source File: debug_lm.py From ru_transformers with Apache License 2.0 | 6 votes |
def mask_tokens(inputs, tokenizer, args): """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, args.mlm_probability) special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[~masked_indices] = -1 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels # from transformers/modeling_utils.py, adapted to tpu
Example #12
Source File: functional.py From pytorch-wrapper with MIT License | 6 votes |
def sub_tensor_dropout(data_tensor, dropout_p, dim, is_model_training): """ Drops (zeroes-out) random sub-Tensors of a Tensor across the specified dimension, during training. :param data_tensor: ND Tensor. :param dropout_p: The dropout probability. :param dim: Int that corresponds to the dimension. :param is_model_training: Whether the model is currently training. :return: ND Tensor. """ if dim < 0: dim = len(data_tensor.shape) + dim if dropout_p is None or dropout_p == 0 or not is_model_training: return data_tensor assert 0 <= dropout_p < 1, 'dropout probability must be in range [0,1)' dp = torch.empty(*(data_tensor.shape[:dim + 1]), dtype=torch.float, device=data_tensor.device) dp = torch.bernoulli(dp.fill_((1 - dropout_p))) dp = dp.view(list(dp.shape) + [1] * (len(data_tensor.shape) - len(dp.shape))) return data_tensor * dp
Example #13
Source File: run_lm_finetuning.py From ru_transformers with Apache License 2.0 | 6 votes |
def mask_tokens(inputs, tokenizer, args): """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, args.mlm_probability) special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[~masked_indices] = -1 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
Example #14
Source File: model_pixelcnn_bmnist.py From torchkit with MIT License | 6 votes |
def train(self): for epoch in range(10): for it, (x, y) in enumerate(self.data_loader): self.optim.zero_grad() x = torch.bernoulli(x) if cuda: x = x.cuda() x = Variable(x.view(-1, 1, 28, 28)) out = nn_.sigmoid(self.mdl((x,0))[0]).permute(0,3,1,2) loss = utils.bceloss(out, x).sum(1).sum(1).sum(1).mean() loss.backward() self.optim.step() if ((it + 1) % 100) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0])
Example #15
Source File: model_made_bmnist.py From torchkit with MIT License | 6 votes |
def train(self): for epoch in range(10): for it, (x, y) in enumerate(self.data_loader): self.optim.zero_grad() x = torch.bernoulli(x) x = Variable(x.view(-1, 784)) out = nn_.sigmoid(self.mdl(x)[:,:,0]) loss = utils.bceloss(out, x).sum(1).mean() loss.backward() self.optim.step() if ((it + 1) % 10) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0]) self.mdl.randomize()
Example #16
Source File: corrupter.py From KBGAN with MIT License | 6 votes |
def corrupt(self, src, rel, dst, keep_truth=True): n = len(src) prob = self.bern_prob[rel] selection = torch.bernoulli(prob).numpy().astype('bool') src_out = np.tile(src.numpy(), (self.n_sample, 1)).transpose() dst_out = np.tile(dst.numpy(), (self.n_sample, 1)).transpose() rel_out = rel.unsqueeze(1).expand(n, self.n_sample) if keep_truth: ent_random = choice(self.n_ent, (n, self.n_sample - 1)) src_out[selection, 1:] = ent_random[selection] dst_out[~selection, 1:] = ent_random[~selection] else: ent_random = choice(self.n_ent, (n, self.n_sample)) src_out[selection, :] = ent_random[selection] dst_out[~selection, :] = ent_random[~selection] return torch.from_numpy(src_out), rel_out, torch.from_numpy(dst_out)
Example #17
Source File: word_tagging.py From didyprog with MIT License | 6 votes |
def make_data(batch, augment=False, singleton_idx=None, unk_idx=None, ): sentences = batch.sentences tags, lengths = batch.tags letters, letters_lengths = batch.letters # Data augmentation for <unk> embedding training if augment: indices = torch.zeros_like(tags) bernoulli = torch.FloatTensor(*tags.shape,).fill_(.3) bernoulli = torch.bernoulli(bernoulli).byte() bernoulli = bernoulli.to(tags.device) indices = indices.byte() for rep in singleton_idx: indices = indices | (tags == rep) indices = indices & bernoulli sentences[indices] = unk_idx return sentences, tags, lengths, letters, letters_lengths
Example #18
Source File: test_learning.py From bindsnet with GNU Affero General Public License v3.0 | 6 votes |
def test_rmax(self): # Connection test network = Network(dt=1.0) network.add_layer(Input(n=100, traces=True, traces_additive=True), name="input") network.add_layer(SRM0Nodes(n=100), name="output") network.add_connection( Connection( source=network.layers["input"], target=network.layers["output"], nu=1e-2, update_rule=Rmax, ), source="input", target="output", ) network.run( inputs={"input": torch.bernoulli(torch.rand(250, 100)).byte()}, time=250, reward=1.0, )
Example #19
Source File: alias_multinomial.py From Pytorch-NCE with MIT License | 6 votes |
def draw(self, *size): """Draw N samples from multinomial Args: - size: the output size of samples """ max_value = self.alias.size(0) kk = self.alias.new(*size).random_(0, max_value).long().view(-1) prob = self.prob[kk] alias = self.alias[kk] # b is whether a random number is greater than q b = torch.bernoulli(prob).long() oq = kk.mul(b) oj = alias.mul(1 - b) return (oq + oj).view(size)
Example #20
Source File: policy_gradient.py From PyTorch-1.x-Reinforcement-Learning-Cookbook with MIT License | 6 votes |
def run_episode(env, weight): state = env.reset() grads = [] total_reward = 0 is_done = False while not is_done: state = torch.from_numpy(state).float() z = torch.matmul(state, weight) probs = torch.nn.Softmax()(z) action = int(torch.bernoulli(probs[1]).item()) d_softmax = torch.diag(probs) - probs.view(-1, 1) * probs d_log = d_softmax[action] / probs[action] grad = state.view(-1, 1) * d_log grads.append(grad) state, reward, is_done, _ = env.step(action) total_reward += reward if is_done: break return total_reward, grads
Example #21
Source File: functional.py From pytorch-wrapper with MIT License | 6 votes |
def same_dropout(data_tensor, dropout_p, dim, is_model_training): """ Drops the same random elements of a Tensor across the specified dimension, during training. :param data_tensor: ND Tensor. :param dropout_p: The dropout probability. :param dim: Int that corresponds to the dimension. :param is_model_training: Whether the model is currently training. :return: ND Tensor. """ if dim < 0: dim = len(data_tensor.shape) + dim if dropout_p is None or dropout_p == 0 or not is_model_training: return data_tensor assert 0 <= dropout_p < 1, 'dropout probability must be in range [0,1)' shape = list(data_tensor.shape) shape[dim] = 1 dp = torch.empty(*shape, dtype=torch.float, device=data_tensor.device) dp = torch.bernoulli(dp.fill_((1 - dropout_p))) / (1 - dropout_p) return data_tensor * dp
Example #22
Source File: tpu_lm_finetuning.py From ru_transformers with Apache License 2.0 | 6 votes |
def mask_tokens(inputs, tokenizer, args): """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, args.mlm_probability) special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[~masked_indices] = -1 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels # from transformers/modeling_utils.py, adapted to tpu
Example #23
Source File: run_language_modeling.py From exbert with Apache License 2.0 | 5 votes |
def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> Tuple[torch.Tensor, torch.Tensor]: """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ if tokenizer.mask_token is None: raise ValueError( "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer." ) labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, args.mlm_probability) special_tokens_mask = [ tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() ] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) if tokenizer._pad_token is not None: padding_mask = labels.eq(tokenizer.pad_token_id) probability_matrix.masked_fill_(padding_mask, value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[~masked_indices] = -100 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
Example #24
Source File: runningstats.py From gandissect with MIT License | 5 votes |
def sample_portion(vec, p=0.5): bits = torch.bernoulli(torch.zeros(vec.shape[0], dtype=torch.uint8, device=vec.device), p) return vec[bits]
Example #25
Source File: model.py From torch-light with MIT License | 5 votes |
def __init__(self, dropout_prob, hidden_size, is_cuda): super().__init__() self.mask = torch.bernoulli(torch.Tensor( 1, hidden_size).fill_(1. - dropout_prob)) if is_cuda: self.mask = self.mask.cuda() self.dropout_prob = dropout_prob
Example #26
Source File: Layers.py From SDNet with MIT License | 5 votes |
def seq_dropout(x, p=0, training=False): """ x: batch * len * input_size """ if training == False or p == 0: return x dropout_mask = Variable(1.0 / (1-p) * torch.bernoulli((1-p) * (x.data.new(x.size(0), x.size(2)).zero_() + 1)), requires_grad=False) return dropout_mask.unsqueeze(1).expand_as(x) * x
Example #27
Source File: NCEAverage.py From FeatureDecoupling with MIT License | 5 votes |
def draw(self, N): ''' Draw N samples from multinomial ''' K = self.alias.size(0) kk = torch.zeros(N, dtype=torch.long, device=self.prob.device).random_(0, K) prob = self.prob.index_select(0, kk) alias = self.alias.index_select(0, kk) # b is whether a random number is greater than q b = torch.bernoulli(prob) oq = kk.mul(b.long()) oj = alias.mul((1-b).long()) return oq + oj
Example #28
Source File: Layer.py From PyTorch_Biaffine_Dependency_Parsing with Apache License 2.0 | 5 votes |
def forward(self, input, masks, initial=None): if self.batch_first: input = input.transpose(0, 1) masks = torch.unsqueeze(masks.transpose(0, 1), dim=2) max_time, batch_size, _ = input.size() masks = masks.expand(-1, -1, self.hidden_size) if initial is None: initial = Variable(input.data.new(batch_size, self.hidden_size).zero_()) initial = (initial, initial) h_n = [] c_n = [] for layer in range(self.num_layers): max_time, batch_size, input_size = input.size() input_mask, hidden_mask = None, None if self.training: input_mask = input.data.new(batch_size, input_size).fill_(1 - self.dropout_in) input_mask = Variable(torch.bernoulli(input_mask), requires_grad=False) input_mask = input_mask / (1 - self.dropout_in) input_mask = torch.unsqueeze(input_mask, dim=2).expand(-1, -1, max_time).permute(2, 0, 1) input = input * input_mask hidden_mask = input.data.new(batch_size, self.hidden_size).fill_(1 - self.dropout_out) hidden_mask = Variable(torch.bernoulli(hidden_mask), requires_grad=False) hidden_mask = hidden_mask / (1 - self.dropout_out) layer_output, (layer_h_n, layer_c_n) = MyLSTM._forward_rnn(cell=self.fcells[layer], \ input=input, masks=masks, initial=initial, drop_masks=hidden_mask) if self.bidirectional: blayer_output, (blayer_h_n, blayer_c_n) = MyLSTM._forward_brnn(cell=self.bcells[layer], \ input=input, masks=masks, initial=initial, drop_masks=hidden_mask) h_n.append(torch.cat([layer_h_n, blayer_h_n], 1) if self.bidirectional else layer_h_n) c_n.append(torch.cat([layer_c_n, blayer_c_n], 1) if self.bidirectional else layer_c_n) input = torch.cat([layer_output, blayer_output], 2) if self.bidirectional else layer_output h_n = torch.stack(h_n, 0) c_n = torch.stack(c_n, 0) return input, (h_n, c_n)
Example #29
Source File: language_modeling_utils.py From simpletransformers with Apache License 2.0 | 5 votes |
def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> Tuple[torch.Tensor, torch.Tensor]: """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ if tokenizer.mask_token is None: raise ValueError( "This tokenizer does not have a mask token which is necessary for masked language modeling." "Set 'mlm' to False in args if you want to use this tokenizer." ) labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training # (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, args.mlm_probability) special_tokens_mask = [ tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() ] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) if tokenizer._pad_token is not None: padding_mask = labels.eq(tokenizer.pad_token_id) probability_matrix.masked_fill_(padding_mask, value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[~masked_indices] = -100 # We only compute loss on masked tokens if args.model_type == "electra" and False: # For ELECTRA, we replace all masked input tokens with tokenizer.mask_token inputs[masked_indices] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) else: # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
Example #30
Source File: Layers.py From SDNet with MIT License | 5 votes |
def generate_mask(new_data, dropout_p=0.0): new_data = (1-dropout_p) * (new_data.zero_() + 1) for i in range(new_data.size(0)): one = random.randint(0, new_data.size(1) - 1) new_data[i][one] = 1 mask = Variable(1.0/(1 - dropout_p) * torch.bernoulli(new_data), requires_grad=False) return mask # Get positional scores and scores for 'yes', 'no', 'unknown' cases