Python model.train() Examples
The following are 27
code examples of model.train().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
model
, or try the search function
.
Example #1
Source File: scoring.py From deepWordBug with Apache License 2.0 | 7 votes |
def grad(model, inputs, pred, classes): losses1 = torch.zeros(inputs.size()[0],inputs.size()[1]) dloss = torch.zeros(inputs.size()[0],inputs.size()[1]) if isinstance(model,torch.nn.DataParallel): model = model.module model.train() embd,output = model(inputs, returnembd = True) # embd.retain_grad() loss = F.nll_loss(output,pred) loss.backward() score = (inputs<=2).float() score = -score score = embd.grad.norm(2,dim=2) + score * 1e9 return score
Example #2
Source File: main.py From Img2Img-Translation-Networks with MIT License | 6 votes |
def main(split_name, checkpoint_dir, cycle_lambda, rec_lambda, lsgan_lambda_a, lsgan_lambda_b, num_separate_layers_g, num_separate_layers_d, num_no_skip_layers, lr_g_mult, lr_d_mult, network_structure): """The main function.""" params = dict() params['cycle_lambda'] = cycle_lambda params['rec_lambda'] = rec_lambda params['lsgan_lambda_a'] = lsgan_lambda_a params['lsgan_lambda_b'] = lsgan_lambda_b params['num_separate_layers_g'] = num_separate_layers_g params['num_separate_layers_d'] = num_separate_layers_d params['num_no_skip_layers'] = num_no_skip_layers params['lr_g_mult'] = lr_g_mult params['lr_d_mult'] = lr_d_mult model = Img2Img(split_name, params, base_lr=.0002, max_step=200, checkpoint_dir='', network_structure=network_structure) model.train()
Example #3
Source File: operations.py From Saliency_Detection_Convolutional_Autoencoder with MIT License | 6 votes |
def restore_model(model, sess, log_path): """ Restore model (including hidden variable) In practice use to resume the training of the same model Args model : model to restore variable to sess : tensorflow session log_path : where to save Returns: step_b : the step number at which training ended """ path = log_path + '/' + model.name saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) return ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('------------------------------------------------------') print('No checkpoint file found') print('------------------------------------------------------ \n') exit()
Example #4
Source File: operations.py From Saliency_Detection_Convolutional_Autoencoder with MIT License | 6 votes |
def save_model(model, sess, log_path, step): """ Save model using tensorflow checkpoint (also save hidden variables) Args: model : model to save variable from sess : tensorflow session log_path : where to save step : number of step at time of saving """ path = log_path + '/' + model.name if tf.gfile.Exists(path): tf.gfile.DeleteRecursively(path) tf.gfile.MakeDirs(path) saver = tf.train.Saver() checkpoint_path = os.path.join(path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
Example #5
Source File: train.py From torch-light with MIT License | 6 votes |
def train(i): model.train() total_loss = 0 for chars, words, position, sub_sidx, sub_eidx, obj_sidx, obj_eidx, sub_slidx, sub_elidx in tqdm(training_data, mininterval=1, desc='Train Processing', leave=False): optimizer.zero_grad() p_sub_sidx, p_sub_eidx, p_obj_sidx, p_obj_eidx, mask = model( chars, words, position, sub_slidx, sub_elidx) ss_loss = mask_binary_cross_entropy(p_sub_sidx, sub_sidx, mask) se_loss = mask_binary_cross_entropy(p_sub_eidx, sub_eidx, mask) os_loss = mask_binary_cross_entropy(p_obj_sidx, obj_sidx, mask) oe_loss = mask_binary_cross_entropy(p_obj_eidx, obj_eidx, mask) loss = ss_loss+se_loss+os_loss+oe_loss loss.backward() optimizer.step() optimizer.update_learning_rate() total_loss += loss.data.item() print( f"train epoch {i+1}/{args.epochs} loss: {total_loss/training_data.stop_step:.4f}")
Example #6
Source File: main.py From PyTorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
Example #7
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() with dni.defer_backward(): output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) dni.backward(loss) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #8
Source File: train.py From MVCNN-TensorFlow with MIT License | 5 votes |
def main(argv): st = time.time() print 'start loading data' listfiles_train, labels_train = read_lists(g_.TRAIN_LOL) listfiles_val, labels_val = read_lists(g_.VAL_LOL) dataset_train = Dataset(listfiles_train, labels_train, subtract_mean=False, V=g_.NUM_VIEWS) dataset_val = Dataset(listfiles_val, labels_val, subtract_mean=False, V=g_.NUM_VIEWS) print 'done loading data, time=', time.time() - st train(dataset_train, dataset_val, FLAGS.weights, FLAGS.caffemodel)
Example #9
Source File: operations.py From Saliency_Detection_Convolutional_Autoencoder with MIT License | 5 votes |
def save_weight_only(model, sess, log_path, step): """ Save model but only weight (meaning no hidden variable) In practice use this to just transfer weights from one model to the other Args: model : model to save variable from sess : tensorflow session log_path : where to save step : number of step at time of saving """ path = log_path + '/' + model.name + '_weight_only' if tf.gfile.Exists(path): tf.gfile.DeleteRecursively(path) tf.gfile.MakeDirs(path) variable_to_save = {} for i in range(30): name = 'conv_' + str(i) variable_to_save[name] = model.parameters_conv[i] if i in [2, 4] and model.concat: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i][0] name = 'deconv_' + str(i) + '_bis' variable_to_save[name] = model.parameters_deconv[i][1] else: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i] if i < 2: name = 'deconv_bis_' + str(i) variable_to_save[name] = model.deconv[i] saver = tf.train.Saver(variable_to_save) checkpoint_path = os.path.join(path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
Example #10
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #11
Source File: train.py From glow with MIT License | 5 votes |
def main(hps): # Initialize Horovod. hvd.init() # Create tensorflow session sess = tensorflow_session() # Download and load dataset. tf.set_random_seed(hvd.rank() + hvd.size() * hps.seed) np.random.seed(hvd.rank() + hvd.size() * hps.seed) # Get data and set train_its and valid_its train_iterator, test_iterator, data_init = get_data(hps, sess) hps.train_its, hps.test_its, hps.full_test_its = get_its(hps) # Create log dir logdir = os.path.abspath(hps.logdir) + "/" if not os.path.exists(logdir): os.mkdir(logdir) # Create model import model model = model.model(sess, hps, train_iterator, test_iterator, data_init) # Initialize visualization functions visualise = init_visualizations(hps, model, logdir) if not hps.inference: # Perform training train(sess, model, hps, logdir, visualise) else: infer(sess, model, hps, test_iterator)
Example #12
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) output = output.view(-1, ntokens) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output, targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() if args.dry_run: break
Example #13
Source File: main.py From Count-Sketch-Optimizers with Apache License 2.0 | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) sys.stdout.flush() total_loss = 0 start_time = time.time()
Example #14
Source File: main.py From HairNet with MIT License | 5 votes |
def main(): if args.mode == 'train': print(args.path) train(args.path) if args.mode == 'test': test(args.path, args.weight)
Example #15
Source File: main.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, source_sampler, target_sampler, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 hidden = model.init_hidden(batch_size) for source_sample, target_sample in zip(source_sampler, target_sampler): model.train() data = torch.stack([data_source[i] for i in source_sample]) targets = torch.stack([data_source[i] for i in target_sample]).view(-1) with torch.no_grad(): output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).item() hidden = repackage_hidden(hidden) return total_loss / len(data_source)
Example #16
Source File: scoring.py From deepWordBug with Apache License 2.0 | 5 votes |
def grad_unconstrained(model, inputs, pred, classes): losses1 = torch.zeros(inputs.size()[0],inputs.size()[1]) dloss = torch.zeros(inputs.size()[0],inputs.size()[1]) if isinstance(model,torch.nn.DataParallel): model = model.module model.train() embd,output = model(inputs, returnembd = True) loss = F.nll_loss(output,pred) loss.backward() score = embd.grad.norm(2,dim=2) return score
Example #17
Source File: train.py From l2w with GNU General Public License v3.0 | 4 votes |
def train(): global lr, best_val_loss # Turn on training mode which enables dropout. model.train() total_loss, nbatches = 0, 0 start_time = time.time() ntokens = len(corpus.dictionary.idx2word) hidden = model.init_hidden(args.batch_size) for b, batch in enumerate(corpus.iter('train', args.batch_size, args.bptt, use_cuda=args.cuda)): model.train() source, target = batch # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() model.softmax.set_target(target.data.view(-1)) output, hidden = model(source, hidden) loss = criterion(output, target.view(-1)) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): if p.grad is not None: p.data.add_(-lr, p.grad.data) total_loss += loss.data.cpu() if b % args.log_interval == 0 and b > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time val_loss = evaluate('valid') print('| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | valid loss {:5.2f} | valid ppl {:8.2f}'.format( epoch, b, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: with open(args.save, 'wb') as f: torch.save(model, f) best_val_loss = val_loss else: # Anneal the learning rate if no improvement has been seen in the validation dataset. lr *= args.ar total_loss = 0 start_time = time.time() # At any point you can hit Ctrl + C to break out of training early.
Example #18
Source File: finetune.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Load the best saved model.
Example #19
Source File: main.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Loop over epochs.
Example #20
Source File: main.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) batch = 0 for source_sample, target_sample in zip(train_source_sampler, train_target_sampler): model.train() data = torch.stack([train_data[i] for i in source_sample]).t_().contiguous() targets = torch.stack([train_data[i] for i in target_sample]).t_().contiguous().view(-1) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum( args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum( args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_source_sampler) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 # Loop over epochs.
Example #21
Source File: operations.py From Saliency_Detection_Convolutional_Autoencoder with MIT License | 4 votes |
def restore_weight_from(model, name, sess, log_path, copy_concat = False): """ Restore model (excluding hidden variable) In practice use to train a model with the weight from another model. As long as both model have architecture from the original model.py, then it works Compatible w or w/o direct connections Args model : model to restore variable to name : name of model to copy sess : tensorflow session log_path : where to restore copy_concat : specify if the model to copy from also had direct connections Returns: step_b : the step number at which training ended """ path = log_path + '/' + name + '_weight_only' variable_to_save = {} for i in range(30): name = 'conv_' + str(i) variable_to_save[name] = model.parameters_conv[i] if i < 2: if copy_concat == model.concat: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i] name = 'deconv_bis_' + str(i) variable_to_save[name] = model.deconv[i] else: if i in [2, 4] and model.concat: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i][0] if copy_concat: name = 'deconv_' + str(i) + '_bis' variable_to_save[name] = model.parameters_deconv[i][1] elif i in [2, 4] and not model.concat: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i] else: name = 'deconv_' + str(i) variable_to_save[name] = model.parameters_deconv[i] saver = tf.train.Saver(variable_to_save) ckpt = tf.train.get_checkpoint_state(path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) return ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('------------------------------------------------------') print('No checkpoint file found') print('------------------------------------------------------ \n') exit()
Example #22
Source File: train.py From pix2pix-flow with MIT License | 4 votes |
def main(hps): # Initialize Horovod. hvd.init() # Create tensorflow session sess = tensorflow_session() # Download and load dataset. tf.set_random_seed(hvd.rank() + hvd.size() * hps.seed) np.random.seed(hvd.rank() + hvd.size() * hps.seed) # Get data and set train_its and valid_its train_iterator_A, test_iterator_A, data_init_A, train_iterator_B, test_iterator_B, data_init_B = get_data(hps, sess) hps.train_its, hps.test_its, hps.full_test_its = get_its(hps) # Create log dir logdir = os.path.abspath(hps.logdir) + "/" if not os.path.exists(logdir): os.mkdir(logdir) # Set up restore path if hps.inference: if hps.restore_path_A == '': hps.restore_path_A = os.path.join(hps.logdir, 'model_A_best_loss.ckpt') if hps.restore_path_B == '': hps.restore_path_B = os.path.join(hps.logdir, 'model_B_best_loss.ckpt') # Create model import model train_iterators = {'A': train_iterator_A, 'B': train_iterator_B} test_iterators = {'A': test_iterator_A, 'B': test_iterator_B} data_inits = {'A': data_init_A, 'B': data_init_B} model = model.model(sess, hps, train_iterators, test_iterators, data_inits) # Initialize visualization functions visualise = init_visualizations(hps, logdir, model) if not hps.inference: train(sess, model, hps, logdir, visualise) else: iterators = {'A': test_iterator_A, 'B': test_iterator_B} infer(sess, model, hps, iterators, hps.full_test_its)
Example #23
Source File: main.py From LM_syneval with MIT License | 4 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) if (not args.single) and (torch.cuda.device_count() > 1): # "module" is necessary when using DataParallel hidden = model.module.init_hidden(args.batch_size) else: hidden = model.init_hidden(args.batch_size) # UNCOMMENT FOR DEBUGGING #random.seed(10) order = list(enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt))) random.shuffle(order) for batch, i in order:#enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt)): # TAG if i > train_lm_data.size(0): data, targets = get_batch(train_ccg_data, i - train_lm_data.size(0)) # LM else: data, targets = get_batch(train_lm_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item()#data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_lm_data)+len(train_ccg_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #24
Source File: main.py From vmf_vae_nlp with MIT License | 4 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #25
Source File: main.py From Img2Img-Translation-Networks with MIT License | 4 votes |
def compute_losses(self): """Compute losses.""" self.reconstruction_loss_a = losses.reconstruction_loss( real_images=self.input_a, generated_images=self.ae_images_a) self.reconstruction_loss_b = losses.reconstruction_loss( real_images=self.input_b, generated_images=self.ae_images_b) self.lsgan_loss_fake_a = losses.lsgan_loss_generator( self.prob_fake_a_is_real) self.lsgan_loss_fake_b = losses.lsgan_loss_generator( self.prob_fake_b_is_real) self.cycle_consistency_loss_a = losses.cycle_consistency_loss( real_images=self.input_a, generated_images=self.cycle_images_a) self.cycle_consistency_loss_b = losses.cycle_consistency_loss( real_images=self.input_b, generated_images=self.cycle_images_b) self.g_loss = self._rec_lambda_a * self.reconstruction_loss_a + \ self._rec_lambda_b * self.reconstruction_loss_b + \ self._cycle_lambda_a * self.cycle_consistency_loss_a + \ self._cycle_lambda_b * self.cycle_consistency_loss_b + \ self._lsgan_lambda_a * self.lsgan_loss_fake_a + \ self._lsgan_lambda_b * self.lsgan_loss_fake_b self.d_loss_A = losses.lsgan_loss_discriminator( prob_real_is_real=self.prob_real_a_is_real, prob_fake_is_real=self.prob_fake_pool_a_is_real) self.d_loss_B = losses.lsgan_loss_discriminator( prob_real_is_real=self.prob_real_b_is_real, prob_fake_is_real=self.prob_fake_pool_b_is_real) self.model_vars = tf.trainable_variables() d_a_vars = [var for var in self.model_vars if 'd1' in var.name or 'd_shared' in var.name] d_b_vars = [var for var in self.model_vars if 'd2' in var.name or 'd_shared' in var.name] g_vars = [var for var in self.model_vars if 'ae1' in var.name or 'ae2' in var.name or 'ae_shared' in var.name] optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5) self.d_A_trainer = optimizer.minimize(self.d_loss_A, var_list=d_a_vars) self.d_B_trainer = optimizer.minimize(self.d_loss_B, var_list=d_b_vars) self.g_trainer = optimizer.minimize(self.g_loss, var_list=g_vars) self.create_summaries()
Example #26
Source File: main.py From lm-context-analysis with Apache License 2.0 | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Loop over epochs.
Example #27
Source File: finetune.py From lm-context-analysis with Apache License 2.0 | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Load the best saved model.