Python Examples of utils.batchify

Source File: eval.py From DREAM with MIT License

6 votes

def eval_pred(dr_model, ub):
    '''
        evaluate dream model for predicting next basket on all training users
        in batches
    '''
    item_embedding = dr_model.encode.weight
    dr_model.eval()
    dr_hidden = dr_model.init_hidden(dr_model.config.batch_size)
    start_time = time()
    id_u, score_u = [], [] # user's id, user's score
    num_batchs = ceil(len(ub) / dr_model.config.batch_size)
    for i,x in enumerate(batchify(ub, dr_model.config.batch_size)):
        print(i)
        baskets, lens, uids = x
        _, dynamic_user, _ = dr_model(baskets, lens, dr_hidden)# shape: batch_size, max_len, embedding_size
        dr_hidden = repackage_hidden(dr_hidden)
        for i,l,du in zip(uids, lens, dynamic_user):
            du_latest = du[l - 1].unsqueeze(0) # shape: 1, embedding_size
            score_up = torch.mm(du_latest, item_embedding.t()) # shape: 1, num_item
            score_u.append(score_up.cpu().data.numpy())
            id_u.append(i)
    elapsed = time() - start_time 
    print('[Predicting] Elapsed: {02.2f}'.format(elapsed))
    return score_ub, id_u

Source File: train.py From DREAM with MIT License

6 votes

def evaluate_reorder_dream():
    dr_model.eval()
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)

    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(test_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(test_ub, dr_config.batch_size, is_reordered=True)):
        baskets, lens, _, r_baskets, h_baskets = x
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config)
        dr_hidden = repackage_hidden(dr_hidden)
        total_loss += loss.data

    # Logging
    elapsed = (time() - start_time) * 1000 / num_batchs
    total_loss = total_loss[0] / num_batchs / dr_config.batch_size
    print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'.format(epoch, elapsed, total_loss))
    return total_loss

Source File: ptb_main.py From online-normalization with BSD 3-Clause "New" or "Revised" License

6 votes

def main(args):
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print('=> load data...')
    corpus = data.Corpus(args.data)

    eval_batch_size = 10
    train_loader = batchify(corpus.train, args.batch_size, device)
    val_loader = batchify(corpus.valid, eval_batch_size, device)

    ntokens = len(corpus.dictionary)
    main_worker(train_loader, val_loader, ntokens, args, device)

Source File: eval.py From DREAM with MIT License

5 votes

def eval_batch(dr_model, ub, up, batch_size, is_reordered = False):
    '''
        Using dr_model to predict (u,p) score in batch
        Parameters:
        - ub: users' baskets
        - up: users' history purchases
        - batch_size
    '''
    # turn on evaluation mode
    dr_model.eval()
    is_cuda =  dr_model.config.cuda
    item_embedding = dr_model.encode.weight
    dr_hidden = dr_model.init_hidden(batch_size)

    id_u, item_u, score_u, dynam_u = [], [], [], []
    start_time = time()
    num_batchs = ceil(len(ub) / batch_size)
    for i, x in enumerate(batchify(ub, batch_size, is_reordered)):
        if is_reordered is True:
            baskets, lens, uids, r_baskets, h_baskets = x
        else:
            baskets, lens, uids = x
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        for uid, l, du in zip(uids, lens, dynamic_user):
            du_latest =  du[l - 1].unsqueeze(0)
            # calculating <u,p> score for all history <u,p> pair 
            history_item = [int(i) for i in up[up.user_id == uid]['product_id'].values[0]]
            history_item = torch.cuda.LongTensor(history_item) if is_cuda else torch.LongTensor(history_item)
            score_up = torch.mm(du_latest, item_embedding[history_item].t()).cpu().data.numpy()[0]
            id_u.append(uid), dynam_u.append(du_latest.cpu().data.numpy()[0]), item_u.append(history_item.cpu().numpy()),score_u.append(score_up)
        # Logging
        elapsed = time() - start_time; start_time = time()
        print('[Predicting]| Batch {:5d} / {:5d} | seconds/batch {:02.02f}'.format(i, num_batchs, elapsed))
    return id_u, item_u, score_u, dynam_u

Source File: eval.py From DREAM with MIT License

5 votes

def get_dynamic_u(uid, dr_model, ub, dr_hidden):
    '''
        get latest dynamic representation of user uid
        dr_hidden must be provided as global variable
    '''
    for i,x in enumerate(batchify(ub, 1)):
        baskets, lens, uids = x
        _, dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
    return dynamic_user[0][lens[0] - 1].unsqueeze(0)

Source File: train.py From DREAM with MIT License

4 votes

def train_dream():
    dr_model.train()  # turn on training mode for dropout
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)
    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(train_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(train_ub, dr_config.batch_size)):
        baskets, lens, _ = x
        dr_hidden = repackage_hidden(dr_hidden)  # repackage hidden state for RNN
        dr_model.zero_grad()  # optim.zero_grad()
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = bpr_loss(baskets, dynamic_user, dr_model.encode.weight, dr_config)
        loss.backward()

        # Clip to avoid gradient exploding
        torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip)

        # Parameter updating
        # manual SGD
        # for p in dr_model.parameters(): # Update parameters by -lr*grad
        #    p.data.add_(- dr_config.learning_rate, p.grad.data)
        # adam
        grad_norm = get_grad_norm(dr_model)
        previous_params = deepcopy(list(dr_model.parameters()))
        optim.step()

        total_loss += loss.data
        params = deepcopy(list(dr_model.parameters()))
        delta = get_weight_update(previous_params, params)
        weight_update_ratio = get_ratio_update(delta, params)

        # Logging
        if i % dr_config.log_interval == 0 and i > 0:
            elapsed = (time() - start_time) * 1000 / dr_config.log_interval
            cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size  # turn tensor into float
            total_loss = 0
            start_time = time()
            print(
                '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i,
                                                                                                              num_batchs,
                                                                                                              elapsed,
                                                                                                              cur_loss))
            writer.add_scalar('model/train_loss', cur_loss, epoch * num_batchs + i)
            writer.add_scalar('model/grad_norm', grad_norm, epoch * num_batchs + i)
            writer.add_scalar('model/weight_update_ratio', weight_update_ratio, epoch * num_batchs + i)

Source File: train.py From DREAM with MIT License

4 votes

def train_reorder_dream():
    dr_model.train()  # turn on training mode for dropout
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)

    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(train_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(train_ub, dr_config.batch_size, is_reordered=True)):
        baskets, lens, ids, r_baskets, h_baskets = x
        dr_hidden = repackage_hidden(dr_hidden)  # repackage hidden state for RNN
        dr_model.zero_grad()  # optim.zero_grad()
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config)

        try:
            loss.backward()
        except RuntimeError:  # for debugging
            print('caching')
            tmp = {'baskets': baskets, 'ids': ids, 'r_baskets': r_baskets, 'h_baskets': h_baskets,
                   'dynamic_user': dynamic_user, 'item_embedding': dr_model.encode.weight}
            print(baskets)
            print(ids)
            print(r_baskets)
            print(h_baskets)
            print(dr_model.encode.weight)
            print(dynamic_user.data)
            with open('tmp.pkl', 'wb') as f:
                pickle.dump(tmp, f, pickle.HIGHEST_PROTOCOL)
            break

        # Clip to avoid gradient exploding
        torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip)

        # Parameter updating
        # manual SGD
        # for p in dr_model.parameters(): # Update parameters by -lr*grad
        #    p.data.add_(- dr_config.learning_rate, p.grad.data)
        # adam
        grad_norm = get_grad_norm(dr_model)
        previous_params = deepcopy(list(dr_model.parameters()))
        optim.step()

        total_loss += loss.data
        params = deepcopy(list(dr_model.parameters()))
        delta = get_weight_update(previous_params, params)
        weight_update_ratio = get_ratio_update(delta, params)

        # Logging
        if i % dr_config.log_interval == 0 and i > 0:
            elapsed = (time() - start_time) * 1000 / dr_config.log_interval
            cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size # turn tensor into float
            total_loss = 0
            start_time = time()
            print(
                '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i,
                                                                                                              num_batchs,
                                                                                                              elapsed,
                                                                                                              cur_loss))

Python utils.batchify() Examples