Python model.train() Examples

The following are 27 code examples of model.train(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module model , or try the search function .
Example #1
Source File: scoring.py    From deepWordBug with Apache License 2.0 7 votes vote down vote up
def grad(model, inputs, pred, classes):
    losses1 = torch.zeros(inputs.size()[0],inputs.size()[1])
    dloss = torch.zeros(inputs.size()[0],inputs.size()[1])
    if isinstance(model,torch.nn.DataParallel):
        model = model.module
    model.train()
    embd,output = model(inputs, returnembd = True)
    # embd.retain_grad()
    loss = F.nll_loss(output,pred)

    loss.backward()
    score = (inputs<=2).float()
    score = -score
    score = embd.grad.norm(2,dim=2) + score * 1e9
    return score 
Example #2
Source File: main.py    From Img2Img-Translation-Networks with MIT License 6 votes vote down vote up
def main(split_name, checkpoint_dir, cycle_lambda, rec_lambda,
         lsgan_lambda_a, lsgan_lambda_b, num_separate_layers_g,
         num_separate_layers_d, num_no_skip_layers,
         lr_g_mult, lr_d_mult, network_structure):
    """The main function."""
    params = dict()
    params['cycle_lambda'] = cycle_lambda
    params['rec_lambda'] = rec_lambda
    params['lsgan_lambda_a'] = lsgan_lambda_a
    params['lsgan_lambda_b'] = lsgan_lambda_b
    params['num_separate_layers_g'] = num_separate_layers_g
    params['num_separate_layers_d'] = num_separate_layers_d
    params['num_no_skip_layers'] = num_no_skip_layers
    params['lr_g_mult'] = lr_g_mult
    params['lr_d_mult'] = lr_d_mult

    model = Img2Img(split_name, params, base_lr=.0002, max_step=200,
                    checkpoint_dir='',
                    network_structure=network_structure)

    model.train() 
Example #3
Source File: operations.py    From Saliency_Detection_Convolutional_Autoencoder with MIT License 6 votes vote down vote up
def restore_model(model, sess, log_path):
  """
  Restore model (including hidden variable)
  In practice use to resume the training of the same model
  
  Args
    model            :         model to restore variable to
    sess             :         tensorflow session
    log_path         :         where to save
    
  Returns:
    step_b           :         the step number at which training ended
  """
    
  path = log_path + '/' + model.name  
  saver = tf.train.Saver()
  ckpt = tf.train.get_checkpoint_state(path)
  if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
    return ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
  else:
    print('------------------------------------------------------')
    print('No checkpoint file found')
    print('------------------------------------------------------ \n')
    exit() 
Example #4
Source File: operations.py    From Saliency_Detection_Convolutional_Autoencoder with MIT License 6 votes vote down vote up
def save_model(model, sess, log_path, step):
  """
  Save model using tensorflow checkpoint (also save hidden variables)
  
  Args:
    model            :         model to save variable from
    sess             :         tensorflow session
    log_path         :         where to save
    step             :         number of step at time of saving
  """
 
  path = log_path + '/' + model.name
  if tf.gfile.Exists(path):
    tf.gfile.DeleteRecursively(path)
  tf.gfile.MakeDirs(path)
  saver = tf.train.Saver()
  checkpoint_path = os.path.join(path, 'model.ckpt')
  saver.save(sess, checkpoint_path, global_step=step) 
Example #5
Source File: train.py    From torch-light with MIT License 6 votes vote down vote up
def train(i):
    model.train()
    total_loss = 0
    for chars, words, position, sub_sidx, sub_eidx, obj_sidx, obj_eidx, sub_slidx, sub_elidx in tqdm(training_data, mininterval=1, desc='Train Processing', leave=False):
        optimizer.zero_grad()
        p_sub_sidx, p_sub_eidx, p_obj_sidx, p_obj_eidx, mask = model(
            chars, words, position, sub_slidx, sub_elidx)

        ss_loss = mask_binary_cross_entropy(p_sub_sidx, sub_sidx, mask)
        se_loss = mask_binary_cross_entropy(p_sub_eidx, sub_eidx, mask)
        os_loss = mask_binary_cross_entropy(p_obj_sidx, obj_sidx, mask)
        oe_loss = mask_binary_cross_entropy(p_obj_eidx, obj_eidx, mask)

        loss = ss_loss+se_loss+os_loss+oe_loss
        loss.backward()

        optimizer.step()
        optimizer.update_learning_rate()
        total_loss += loss.data.item()

    print(
        f"train epoch {i+1}/{args.epochs} loss: {total_loss/training_data.stop_step:.4f}") 
Example #6
Source File: main.py    From PyTorch with MIT License 5 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if args.model != 'Transformer':
        hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if args.model == 'Transformer':
            output = model(data)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time() 
Example #7
Source File: main.py    From dni-pytorch with MIT License 5 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        with dni.defer_backward():
            output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, ntokens), targets)
            dni.backward(loss)

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

# Loop over epochs. 
Example #8
Source File: train.py    From MVCNN-TensorFlow with MIT License 5 votes vote down vote up
def main(argv):
    st = time.time() 
    print 'start loading data'

    listfiles_train, labels_train = read_lists(g_.TRAIN_LOL)
    listfiles_val, labels_val = read_lists(g_.VAL_LOL)
    dataset_train = Dataset(listfiles_train, labels_train, subtract_mean=False, V=g_.NUM_VIEWS)
    dataset_val = Dataset(listfiles_val, labels_val, subtract_mean=False, V=g_.NUM_VIEWS)

    print 'done loading data, time=', time.time() - st

    train(dataset_train, dataset_val, FLAGS.weights, FLAGS.caffemodel) 
Example #9
Source File: operations.py    From Saliency_Detection_Convolutional_Autoencoder with MIT License 5 votes vote down vote up
def save_weight_only(model, sess, log_path, step):
  """
  Save model but only weight (meaning no hidden variable)
  In practice use this to just transfer weights from one model to the other
  
  Args:
    model            :         model to save variable from
    sess             :         tensorflow session
    log_path         :         where to save
    step             :         number of step at time of saving
  """

  path = log_path + '/' + model.name + '_weight_only'
  if tf.gfile.Exists(path):
    tf.gfile.DeleteRecursively(path)
  tf.gfile.MakeDirs(path)
  
  variable_to_save = {}
  for i in range(30):
    name = 'conv_' + str(i)
    variable_to_save[name] = model.parameters_conv[i]
    if i in [2, 4] and model.concat:
      name = 'deconv_' + str(i)
      variable_to_save[name] = model.parameters_deconv[i][0]
      name = 'deconv_' + str(i) + '_bis'
      variable_to_save[name] = model.parameters_deconv[i][1]
    else:
      name = 'deconv_' + str(i)
      variable_to_save[name] = model.parameters_deconv[i]
    if i < 2:
      name = 'deconv_bis_' + str(i)
      variable_to_save[name] = model.deconv[i]
  saver = tf.train.Saver(variable_to_save)
  checkpoint_path = os.path.join(path, 'model.ckpt')
  saver.save(sess, checkpoint_path, global_step=step) 
Example #10
Source File: main.py    From word-language-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

# Loop over epochs. 
Example #11
Source File: train.py    From glow with MIT License 5 votes vote down vote up
def main(hps):

    # Initialize Horovod.
    hvd.init()

    # Create tensorflow session
    sess = tensorflow_session()

    # Download and load dataset.
    tf.set_random_seed(hvd.rank() + hvd.size() * hps.seed)
    np.random.seed(hvd.rank() + hvd.size() * hps.seed)

    # Get data and set train_its and valid_its
    train_iterator, test_iterator, data_init = get_data(hps, sess)
    hps.train_its, hps.test_its, hps.full_test_its = get_its(hps)

    # Create log dir
    logdir = os.path.abspath(hps.logdir) + "/"
    if not os.path.exists(logdir):
        os.mkdir(logdir)

    # Create model
    import model
    model = model.model(sess, hps, train_iterator, test_iterator, data_init)

    # Initialize visualization functions
    visualise = init_visualizations(hps, model, logdir)

    if not hps.inference:
        # Perform training
        train(sess, model, hps, logdir, visualise)
    else:
        infer(sess, model, hps, test_iterator) 
Example #12
Source File: main.py    From examples with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if args.model != 'Transformer':
        hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if args.model == 'Transformer':
            output = model(data)
            output = output.view(-1, ntokens)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output, targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        if args.dry_run:
            break 
Example #13
Source File: main.py    From Count-Sketch-Optimizers with Apache License 2.0 5 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            sys.stdout.flush()
            total_loss = 0
            start_time = time.time() 
Example #14
Source File: main.py    From HairNet with MIT License 5 votes vote down vote up
def main():
    if args.mode == 'train':
        print(args.path)
        train(args.path)
    if args.mode == 'test':
        test(args.path, args.weight) 
Example #15
Source File: main.py    From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, source_sampler, target_sampler, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    hidden = model.init_hidden(batch_size)

    for source_sample, target_sample in zip(source_sampler, target_sampler):
        model.train()
        data = torch.stack([data_source[i] for i in source_sample])
        targets = torch.stack([data_source[i] for i in target_sample]).view(-1)
        with torch.no_grad():
            output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output,
                                            targets).item()
        hidden = repackage_hidden(hidden)
    return total_loss / len(data_source) 
Example #16
Source File: scoring.py    From deepWordBug with Apache License 2.0 5 votes vote down vote up
def grad_unconstrained(model, inputs, pred, classes):
    losses1 = torch.zeros(inputs.size()[0],inputs.size()[1])
    dloss = torch.zeros(inputs.size()[0],inputs.size()[1])
    if isinstance(model,torch.nn.DataParallel):
        model = model.module
    model.train()
    embd,output = model(inputs, returnembd = True)
    loss = F.nll_loss(output,pred)

    loss.backward()
    score = embd.grad.norm(2,dim=2)
    return score 
Example #17
Source File: train.py    From l2w with GNU General Public License v3.0 4 votes vote down vote up
def train():
    global lr, best_val_loss
    # Turn on training mode which enables dropout.
    model.train()
    total_loss, nbatches = 0, 0
    start_time = time.time()
    ntokens = len(corpus.dictionary.idx2word)
    hidden = model.init_hidden(args.batch_size)
    for b, batch in enumerate(corpus.iter('train', args.batch_size, args.bptt, use_cuda=args.cuda)):
        model.train()
        source, target = batch
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        model.softmax.set_target(target.data.view(-1))
        output, hidden = model(source, hidden)
        loss = criterion(output, target.view(-1))
        loss.backward()


        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        for p in model.parameters():
            if p.grad is not None:
                p.data.add_(-lr, p.grad.data)

        total_loss += loss.data.cpu()

        if b % args.log_interval == 0 and b > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            val_loss = evaluate('valid')
            print('| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
                epoch, b, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss),
                val_loss, math.exp(val_loss)))

            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                with open(args.save, 'wb') as f:
                    torch.save(model, f)
                best_val_loss = val_loss
            else:
                # Anneal the learning rate if no improvement has been seen in the validation dataset.
                lr *= args.ar

            total_loss = 0
            start_time = time.time()



# At any point you can hit Ctrl + C to break out of training early. 
Example #18
Source File: finetune.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len


# Load the best saved model. 
Example #19
Source File: main.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len

# Loop over epochs. 
Example #20
Source File: main.py    From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN':
        model.reset()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    batch = 0
    for source_sample, target_sample in zip(train_source_sampler, train_target_sampler):
        model.train()
        data = torch.stack([train_data[i] for i in source_sample]).t_().contiguous()
        targets = torch.stack([train_data[i] for i in target_sample]).t_().contiguous().view(-1)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha:
            loss = loss + sum(
                args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta:
            loss = loss + sum(
                args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip:
            torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.item()
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                      epoch, batch,
                      len(train_source_sampler) // args.bptt,
                      optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss,
                      math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1


# Loop over epochs. 
Example #21
Source File: operations.py    From Saliency_Detection_Convolutional_Autoencoder with MIT License 4 votes vote down vote up
def restore_weight_from(model, name, sess, log_path, copy_concat = False):
  """
  Restore model (excluding hidden variable)
  In practice use to train a model with the weight from another model. 
  As long as both model have architecture from the original model.py, then it works 
  Compatible w or w/o direct connections
  
  Args
    model            :         model to restore variable to
    name             :         name of model to copy
    sess             :         tensorflow session
    log_path         :         where to restore
    copy_concat      :         specify if the model to copy from also had direct connections
    
  Returns:
    step_b           :         the step number at which training ended
  """

  path = log_path + '/' + name + '_weight_only'
  
  variable_to_save = {}
  for i in range(30):
    name = 'conv_' + str(i)
    variable_to_save[name] = model.parameters_conv[i]
    if i < 2:
      if copy_concat == model.concat:
        name = 'deconv_' + str(i)
        variable_to_save[name] = model.parameters_deconv[i]
        name = 'deconv_bis_' + str(i)
        variable_to_save[name] = model.deconv[i]
    else:
      if i in [2, 4] and model.concat:
        name = 'deconv_' + str(i)
        variable_to_save[name] = model.parameters_deconv[i][0]
        if copy_concat:
          name = 'deconv_' + str(i) + '_bis'
          variable_to_save[name] = model.parameters_deconv[i][1]
      elif i in [2, 4] and not model.concat:
        name = 'deconv_' + str(i)
        variable_to_save[name] = model.parameters_deconv[i]
      else:
        name = 'deconv_' + str(i)
        variable_to_save[name] = model.parameters_deconv[i]

  saver = tf.train.Saver(variable_to_save)
  ckpt = tf.train.get_checkpoint_state(path)
  if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
    return ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
  else:
    print('------------------------------------------------------')
    print('No checkpoint file found')
    print('------------------------------------------------------ \n')
    exit() 
Example #22
Source File: train.py    From pix2pix-flow with MIT License 4 votes vote down vote up
def main(hps):

    # Initialize Horovod.
    hvd.init()

    # Create tensorflow session
    sess = tensorflow_session()

    # Download and load dataset.
    tf.set_random_seed(hvd.rank() + hvd.size() * hps.seed)
    np.random.seed(hvd.rank() + hvd.size() * hps.seed)

    # Get data and set train_its and valid_its
    train_iterator_A, test_iterator_A, data_init_A, train_iterator_B, test_iterator_B, data_init_B = get_data(hps, sess)
    hps.train_its, hps.test_its, hps.full_test_its = get_its(hps)

    # Create log dir
    logdir = os.path.abspath(hps.logdir) + "/"
    if not os.path.exists(logdir):
        os.mkdir(logdir)

    # Set up restore path
    if hps.inference:
        if hps.restore_path_A == '':
            hps.restore_path_A = os.path.join(hps.logdir, 'model_A_best_loss.ckpt')
        if hps.restore_path_B == '':
            hps.restore_path_B = os.path.join(hps.logdir, 'model_B_best_loss.ckpt')

    # Create model
    import model
    train_iterators = {'A': train_iterator_A, 'B': train_iterator_B}
    test_iterators = {'A': test_iterator_A, 'B': test_iterator_B}
    data_inits = {'A': data_init_A, 'B': data_init_B}
    model = model.model(sess, hps,
                        train_iterators, test_iterators, data_inits)
    # Initialize visualization functions
    visualise = init_visualizations(hps, logdir, model)
    if not hps.inference:
        train(sess, model, hps, logdir, visualise)
    else:
        iterators = {'A': test_iterator_A, 'B': test_iterator_B}
        infer(sess, model, hps, iterators, hps.full_test_its) 
Example #23
Source File: main.py    From LM_syneval with MIT License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if (not args.single) and (torch.cuda.device_count() > 1):
        # "module" is necessary when using DataParallel
        hidden = model.module.init_hidden(args.batch_size)
    else:
        hidden = model.init_hidden(args.batch_size)
    # UNCOMMENT FOR DEBUGGING
    #random.seed(10)
    order = list(enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt)))
    random.shuffle(order)
    for batch, i in order:#enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt)):
        # TAG
        if i > train_lm_data.size(0):
            data, targets = get_batch(train_ccg_data, i - train_lm_data.size(0))
        # LM
        else:
            data, targets = get_batch(train_lm_data, i)
            
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()#data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_lm_data)+len(train_ccg_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

# Loop over epochs. 
Example #24
Source File: main.py    From vmf_vae_nlp with MIT License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                              elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()


# Loop over epochs. 
Example #25
Source File: main.py    From Img2Img-Translation-Networks with MIT License 4 votes vote down vote up
def compute_losses(self):
        """Compute losses."""
        self.reconstruction_loss_a = losses.reconstruction_loss(
            real_images=self.input_a,
            generated_images=self.ae_images_a)
        self.reconstruction_loss_b = losses.reconstruction_loss(
            real_images=self.input_b,
            generated_images=self.ae_images_b)

        self.lsgan_loss_fake_a = losses.lsgan_loss_generator(
            self.prob_fake_a_is_real)
        self.lsgan_loss_fake_b = losses.lsgan_loss_generator(
            self.prob_fake_b_is_real)

        self.cycle_consistency_loss_a = losses.cycle_consistency_loss(
            real_images=self.input_a, generated_images=self.cycle_images_a)
        self.cycle_consistency_loss_b = losses.cycle_consistency_loss(
            real_images=self.input_b, generated_images=self.cycle_images_b)

        self.g_loss = self._rec_lambda_a * self.reconstruction_loss_a + \
            self._rec_lambda_b * self.reconstruction_loss_b + \
            self._cycle_lambda_a * self.cycle_consistency_loss_a + \
            self._cycle_lambda_b * self.cycle_consistency_loss_b + \
            self._lsgan_lambda_a * self.lsgan_loss_fake_a + \
            self._lsgan_lambda_b * self.lsgan_loss_fake_b

        self.d_loss_A = losses.lsgan_loss_discriminator(
            prob_real_is_real=self.prob_real_a_is_real,
            prob_fake_is_real=self.prob_fake_pool_a_is_real)
        self.d_loss_B = losses.lsgan_loss_discriminator(
            prob_real_is_real=self.prob_real_b_is_real,
            prob_fake_is_real=self.prob_fake_pool_b_is_real)

        self.model_vars = tf.trainable_variables()

        d_a_vars = [var for var in self.model_vars if 'd1' in var.name or
                    'd_shared' in var.name]
        d_b_vars = [var for var in self.model_vars if 'd2' in var.name or
                    'd_shared' in var.name]
        g_vars = [var for var in self.model_vars
                  if 'ae1' in var.name or 'ae2' in var.name or
                  'ae_shared' in var.name]

        optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5)

        self.d_A_trainer = optimizer.minimize(self.d_loss_A, var_list=d_a_vars)
        self.d_B_trainer = optimizer.minimize(self.d_loss_B, var_list=d_b_vars)
        self.g_trainer = optimizer.minimize(self.g_loss, var_list=g_vars)

        self.create_summaries() 
Example #26
Source File: main.py    From lm-context-analysis with Apache License 2.0 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len

# Loop over epochs. 
Example #27
Source File: finetune.py    From lm-context-analysis with Apache License 2.0 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len


# Load the best saved model.