Python utils.get_batch() Examples

The following are 30 code examples of utils.get_batch(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: main.py    From mos with MIT License 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args)
            targets = targets.view(-1)

            log_prob, hidden = parallel_model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

            total_loss += loss * len(data)

            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source) 
Example #2
Source File: train.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args.bptt, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #3
Source File: test.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        print(i, data_source.size(0)-1)
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden, args.arc)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)

# Load the best saved model. 
Example #4
Source File: model_search.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def evaluate(data_source, model, parallel_model, params, batch_size=10):
  # Turn on evaluation mode which disables dropout.
  arch_pool = params['arch_pool']
  logging.info('Evaluating on {} archs'.format(len(arch_pool)))
  start_time = time.time()
  valid_score_list = []
  for arch in arch_pool:
    model.eval()
    hidden = model.init_hidden(batch_size)
    #whether use random batch ?
    # data_source is in the format of [length, bs, ...]
    #for i in range(0, data_source.size(0) - 1, params['bptt']):
    #for i in range(1):
    batch = np.random.randint(0, data_source.size(0)//params['bptt'])
    data, targets = get_batch(data_source, batch, params['bptt'], evaluation=True)
    targets = targets.view(-1)
    log_prob, hidden = parallel_model(data, hidden, arch)
    loss = F.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data[0]
    valid_score_list.append(loss)
  eval_time = time.time() - start_time
  mean_valid_score = np.mean(valid_score_list)
  logging.info('Mean loss {:5.2f} | mean ppl {:8.2f} | time {:5.2f} secs'.format(mean_valid_score, np.exp(mean_valid_score), eval_time))
  return valid_score_list 
Example #5
Source File: train.py    From darts with Apache License 2.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #6
Source File: test.py    From darts with Apache License 2.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        print(i, data_source.size(0)-1)
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)

# Load the best saved model. 
Example #7
Source File: train_search.py    From darts with Apache License 2.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #8
Source File: test.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        print(i, data_source.size(0)-1)
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)

# Load the best saved model. 
Example #9
Source File: train.py    From NAO with GNU General Public License v3.0 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += loss * len(data)

        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #10
Source File: finetune.py    From mos with MIT License 6 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args)
            targets = targets.view(-1)
            
            log_prob, hidden = parallel_model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

            total_loss += len(data) * loss
            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source) 
Example #11
Source File: main.py    From bird_classification with MIT License 6 votes vote down vote up
def predict(self, sess, set_type):
        if set_type == 'val':
            num_images = len(self.obj.val_list)
            generator = self.obj.val_generator()
        elif  set_type == 'test':
            num_images = len(self.obj.test_list)
            generator = self.obj.test_generator()
        else:
            num_images = len(self.obj.train_list)
            generator = self.obj.train_generator()
        
        true_positives = 0
        num_batches = num_images//self.batch_size if num_images%self.batch_size == 0 else num_images//self.batch_size + 1 
        model_predictions = []
        for i in range(num_batches):
            x_batch, _ = get_batch(generator, set_type , height=self.model.height, width=self.model.width)
            predicted = sess.run([ self.model.pred], feed_dict={self.model.x:x_batch})
            model_predictions.extend(predicted[0])
        return model_predictions 
Example #12
Source File: main.py    From bird_classification with MIT License 6 votes vote down vote up
def evaluate(self, sess, set_type):
        if set_type == 'val':
            num_images = len(self.obj.val_list)
            generator = self.obj.val_generator()
        else:
            num_images = len(self.obj.train_list)
            generator = self.obj.train_generator()
        
        true_positives = 0
        val_loss = 0
        num_batches = num_images//self.batch_size if num_images%self.batch_size == 0 else num_images//self.batch_size + 1 
        for i in range(num_batches):
            x_batch, y_batch = get_batch(generator, set_type, height=self.model.height, width=self.model.width)

            predicted = sess.run([self.model.pred], feed_dict={self.model.x:x_batch, self.model.y:y_batch})
            
            true_positives = true_positives + np.sum(predicted[0] == np.argmax(y_batch,1))

        print('set_type:',set_type, 'accuracy = ', true_positives*100.0/num_images)
        

    #predict the labels for test dataset 
Example #13
Source File: main.py    From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source) 
Example #14
Source File: finetune.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #15
Source File: main.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source) 
Example #16
Source File: cvae.py    From CollaborativeVAE with MIT License 5 votes vote down vote up
def cdl_estimate(self, data_x, num_iter):
        for i in range(num_iter):
            b_x, ids = utils.get_batch(data_x, self.params.batch_size)
            _, l, gen_loss, v_loss = self.sess.run((self.optimizer, self.loss, self.gen_loss, self.v_loss),
             feed_dict={self.x: b_x, self.v: self.m_V[ids, :]})
            # Display logs per epoch step
            if i % self.print_step == 0 and self.verbose:
                print "Iter:", '%04d' % (i+1), \
                      "loss=", "{:.5f}".format(l), \
                      "genloss=", "{:.5f}".format(gen_loss), \
                      "vloss=", "{:.5f}".format(v_loss)
        return gen_loss 
Example #17
Source File: test_phrase_grammar.py    From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=1):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output = model.decoder(output)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss / len(data_source) 
Example #18
Source File: finetune.py    From lm-context-analysis with Apache License 2.0 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #19
Source File: eval.py    From fraternal-dropout with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)

# Load the best saved model. 
Example #20
Source File: main.py    From fraternal-dropout with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #21
Source File: eval.py    From lm-context-analysis with Apache License 2.0 5 votes vote down vote up
def evaluate(data_source, batch_size, seq_len):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    total_loss = 0
    tokens = 0
    n = 0
    save_all_losses = []

    ntokens = len(corpus.dictionary)

    hidden = model.init_hidden(batch_size)

    for i in range(0, data_source.size(0) - 1, seq_len):
        tokens += seq_len
        data, targets = get_batch(data_source, i, args, evaluation=True, seq_len=seq_len)
        output, hidden = model(data, hidden)
        output = nn.functional.log_softmax(output.permute(2,1,0)).permute(2,1,0)
        targets = targets.view(data.data.shape[0], batch_size, -1)
        CELoss = torch.gather(output.data, dim=2, index=targets.data).squeeze()
        CELoss = -1*CELoss
        if tokens < args.start_token: continue # We are not ready to accumulate error yet
        elif tokens >= args.start_token and tokens-seq_len < args.start_token:
            data.data = data.data[-(tokens-args.start_token+1):]
            CELoss = CELoss[-(tokens-args.start_token+1):]
            print('First word: %s' % (corpus.dictionary.idx2word[data.data[-(tokens-args.start_token+1),0]]))
        total_loss += torch.sum(CELoss)
        n += data.size(0)
        save_all_losses += CELoss.tolist()
        hidden = repackage_hidden(hidden)
    print('total: %d' % n)
    print('Last word: %s' % (corpus.dictionary.idx2word[data.data[-1,0]]))
    return total_loss / float(n), save_all_losses 
Example #22
Source File: main.py    From lm-context-analysis with Apache License 2.0 5 votes vote down vote up
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source) 
Example #23
Source File: main.py    From bird_classification with MIT License 4 votes vote down vote up
def run_training_testing(self, model_weight_path, gpu_memory_fraction):

        # train the network
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction

        train_generator_obj = self.obj.train_generator()

        with tf.Session(config=config) as sess:
            summary_writer = tf.summary.FileWriter('./checkpoints/', sess.graph)
            saver = tf.train.Saver(max_to_keep=2)
            self.model.optimize()           

            sess.run(tf.global_variables_initializer())

            self.model.load_weight(sess, model_weight_path)
            
            loss = 0
            true_positives = 0
            for epochs in range(1, self.num_epochs+1):
                start_time = time.time()
                for step in range(len(self.obj.train_list)//self.batch_size + 1):
                    x_batch, y_batch = get_batch(train_generator_obj, 'train', height=self.model.height, width=self.model.width)
                    #temp1 = sess.run([self.pool] , feed_dict={self.model.x:x_batch, self.model.y:y_batch})
                    #print(temp1.shape)
                    _, loss_curr, predicted = sess.run([self.model.optimizer, self.model.loss, self.model.pred] , feed_dict={self.model.x:x_batch, self.model.y:y_batch})
                    loss = 0.9*loss + 0.1*loss_curr
                    true_positives = true_positives + np.sum(predicted == np.argmax(y_batch,1))

                end_time = time.time()
                print('time_taken', end_time -start_time)    
                print('epochs:',epochs, ' train-loss:', loss, 'train-acc:', true_positives*100.0/len(self.obj.train_list))                    
                true_positives = 0

                saver.save(sess, './checkpoints/', global_step=step)
                self.evaluate(sess, 'val')
                print('')


        # predict values for test dataset
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        
        with tf.Session(config=config) as sess:
            saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/'))
            model_pred = self.predict(sess, 'test')    


        #save the results in the required csv format
        save_csv(model_pred, self.obj) 
Example #24
Source File: finetune.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len


# Load the best saved model. 
Example #25
Source File: main.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len

# Loop over epochs. 
Example #26
Source File: pointer.py    From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)

# Load the best saved model. 
Example #27
Source File: main.py    From lm-context-analysis with Apache License 2.0 4 votes vote down vote up
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len

# Loop over epochs. 
Example #28
Source File: vae.py    From CollaborativeVAE with MIT License 4 votes vote down vote up
def run_latent(self, data_x, hidden_dim, batch_size, lr, epoch, print_step=100):
        tf.reset_default_graph()
        n = data_x.shape[0]
        input_dim = len(data_x[0])
        num_iter = int(n / batch_size)
        sess = tf.Session()
        rec = { 'W_z_mean': tf.get_variable("W_z_mean", [self.dims[1], self.n_z], 
                    initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32),
                'b_z_mean': tf.get_variable("b_z_mean", [self.n_z], 
                    initializer=tf.constant_initializer(0.0), dtype=tf.float32),
                'W_z_log_sigma': tf.get_variable("W_z_log_sigma", [self.dims[1], self.n_z], 
                    initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32),
                'b_z_log_sigma': tf.get_variable("b_z_log_sigma", [self.n_z], 
                    initializer=tf.constant_initializer(0.0), dtype=tf.float32)}
        gen = {'W2': tf.get_variable("W2", [self.n_z, self.dims[1]], 
                    initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32),
                'b2': tf.get_variable("b2", [self.dims[1]], 
                    initializer=tf.constant_initializer(0.0), dtype=tf.float32)}
        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
        z_mean = tf.matmul(x, rec['W_z_mean']) + rec['b_z_mean']
        z_log_sigma_sq = tf.matmul(x, rec['W_z_log_sigma']) + rec['b_z_log_sigma']
        eps = tf.random_normal((batch_size, hidden_dim), 0, 1,dtype=tf.float32)
        z = z_mean + tf.sqrt(tf.maximum(tf.exp(z_log_sigma_sq), 1e-10)) * eps
        x_recon = tf.matmul(z, gen['W2']) + gen['b2']
        x_recon = tf.nn.sigmoid(x_recon, name='x_recon')
        gen_loss = -tf.reduce_mean(tf.reduce_sum(x * tf.log(tf.maximum(x_recon, 1e-10)) 
            + (1-x) * tf.log(tf.maximum(1 - x_recon, 1e-10)),1))
        latent_loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.square(z_mean) + tf.exp(z_log_sigma_sq)
            - z_log_sigma_sq - 1, 1))
        loss = gen_loss + latent_loss
        train_op = tf.train.AdamOptimizer(lr).minimize(loss)
        sess.run(tf.global_variables_initializer())
        for i in range(epoch):
            for it in range(num_iter):
                b_x, ids = utils.get_batch(data_x, batch_size)
                _, l, gl, ll = sess.run((train_op, loss, gen_loss, latent_loss), feed_dict={x: b_x})
            if (i + 1) % print_step == 0:
                logging.info('epoch {0}: batch loss = {1}, gen_loss={2}, latent_loss={3}'.format(i, l, gl, ll))

        self.weights.append(sess.run(rec['W_z_mean']))
        self.weights.append(sess.run(rec['W_z_log_sigma']))
        self.biases.append(sess.run(rec['b_z_mean']))
        self.biases.append(sess.run(rec['b_z_log_sigma']))
        self.de_weights.append(sess.run(gen['W2']))
        self.de_biases.append(sess.run(gen['b2'])) 
Example #29
Source File: vae.py    From CollaborativeVAE with MIT License 4 votes vote down vote up
def run(self, data_x, hidden_dim, activation, loss, lr,
            print_step, epoch, batch_size=100):
        tf.reset_default_graph()
        input_dim = len(data_x[0])
        n = data_x.shape[0]
        num_iter = int(n / batch_size)
        sess = tf.Session()
        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
        x_ = tf.placeholder(dtype=tf.float32, shape=[
                            None, input_dim], name='x_')
        encode = {'weights': tf.Variable(xavier_init(input_dim, hidden_dim, dtype=tf.float32)),
            'biases': tf.Variable(tf.zeros([hidden_dim],
                                                      dtype=tf.float32))}
        decode = {'biases': tf.Variable(tf.zeros([input_dim],dtype=tf.float32)),
                  'weights': tf.transpose(encode['weights'])}
        encoded = self.activate(
            tf.matmul(x, encode['weights']) + encode['biases'], activation)
        decoded = tf.matmul(encoded, decode['weights']) + decode['biases']

        # reconstruction loss
        if loss == 'rmse':
            # loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded))))
            loss = tf.reduce_mean(tf.reduce_sum(tf.square(x_ - decoded), 1))
        elif loss == 'cross-entropy':
            decoded = tf.nn.sigmoid(decoded, name='decoded')
            # loss = -tf.reduce_mean(x_ * tf.log(decoded))
            loss = -tf.reduce_mean(tf.reduce_sum(x_ * tf.log(tf.maximum(decoded, 1e-16)) + (1-x_)*tf.log(tf.maximum(1-decoded, 1e-16)), 1))
        train_op = tf.train.AdamOptimizer(lr).minimize(loss)

        sess.run(tf.global_variables_initializer())
        for i in range(epoch):
            for it in range(num_iter):
                b_x_, ids = utils.get_batch(data_x, batch_size)
                b_x = self.add_noise(b_x_)
                _, l = sess.run((train_op, loss), feed_dict={x: b_x, x_: b_x_})
            if (i + 1) % print_step == 0:
                l = sess.run(loss, feed_dict={x: b_x_, x_: b_x_})
                logging.info('epoch {0}: batch loss = {1}'.format(i, l))
        # debug
        # print('Decoded', sess.run(decoded, feed_dict={x: self.data_x_})[0])
        self.weights.append(sess.run(encode['weights']))
        self.biases.append(sess.run(encode['biases']))
        self.de_weights.append(sess.run(decode['weights']))
        self.de_biases.append(sess.run(decode['biases']))

        return sess.run(encoded, feed_dict={x: data_x}) 
Example #30
Source File: pointer.py    From outlier-exposure with Apache License 2.0 4 votes vote down vote up
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)

# Load the best saved model.