Python torch.utils.data.sort() Examples
The following are 30
code examples of torch.utils.data.sort().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data
, or try the search function
.
Example #1
Source File: data_utils.py From neural-question-generation with MIT License | 7 votes |
def collate_fn(data): def merge(sequences): lengths = [len(sequence) for sequence in sequences] padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs data.sort(key=lambda x: len(x[0]), reverse=True) src_seqs, ext_src_seqs, trg_seqs, ext_trg_seqs, oov_lst = zip(*data) src_seqs = merge(src_seqs) ext_src_seqs = merge(ext_src_seqs) trg_seqs = merge(trg_seqs) ext_trg_seqs = merge(ext_trg_seqs) return src_seqs, ext_src_seqs, trg_seqs, ext_trg_seqs, oov_lst
Example #2
Source File: data_loader.py From inversecooking with MIT License | 6 votes |
def collate_fn(data): # Sort a data list by caption length (descending order). # data.sort(key=lambda x: len(x[2]), reverse=True) image_input, captions, ingrs_gt, img_id, path, pad_value = zip(*data) # Merge images (from tuple of 3D tensor to 4D tensor). image_input = torch.stack(image_input, 0) ingrs_gt = torch.stack(ingrs_gt, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.ones(len(captions), max(lengths)).long()*pad_value[0] for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return image_input, targets, ingrs_gt, img_id, path
Example #3
Source File: data_utils.py From neural-question-generation with MIT License | 6 votes |
def collate_fn_tag(data): def merge(sequences): lengths = [len(sequence) for sequence in sequences] padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs data.sort(key=lambda x: len(x[0]), reverse=True) src_seqs, ext_src_seqs, trg_seqs, ext_trg_seqs, oov_lst, tag_seqs = zip( *data) src_seqs = merge(src_seqs) ext_src_seqs = merge(ext_src_seqs) trg_seqs = merge(trg_seqs) ext_trg_seqs = merge(ext_trg_seqs) tag_seqs = merge(tag_seqs) assert src_seqs.size(1) == tag_seqs.size( 1), "length of tokens and tags should be equal" return src_seqs, ext_src_seqs, trg_seqs, ext_trg_seqs, tag_seqs, oov_lst
Example #4
Source File: data_provider.py From dual_encoding with Apache License 2.0 | 6 votes |
def collate_text(data): if data[0][0] is not None: data.sort(key=lambda x: len(x[0]), reverse=True) captions, cap_bows, idxs, cap_ids = zip(*data) if captions[0] is not None: # Merge captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] target = torch.zeros(len(captions), max(lengths)).long() words_mask = torch.zeros(len(captions), max(lengths)) for i, cap in enumerate(captions): end = lengths[i] target[i, :end] = cap[:end] words_mask[i, :end] = 1.0 else: target = None lengths = None words_mask = None cap_bows = torch.stack(cap_bows, 0) if cap_bows[0] is not None else None text_data = (target, cap_bows, lengths, words_mask) return text_data, idxs, cap_ids
Example #5
Source File: gpt2tunediscrim.py From PPLM with Apache License 2.0 | 6 votes |
def collate_fn(data): def merge(sequences): lengths = [len(seq) for seq in sequences] padded_seqs = torch.zeros(len(sequences), max(lengths)).long().cuda() # padding index 0 for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths data.sort(key=lambda x: len(x["X"]), reverse=True) # sort by source seq item_info = {} for key in data[0].keys(): item_info[key] = [d[key] for d in data] # input x_batch, _ = merge(item_info['X']) y_batch = item_info['y'] return x_batch, torch.tensor(y_batch, device='cuda', dtype=torch.long)
Example #6
Source File: utils_kvr_mem2seq.py From Mem2Seq with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max(lengths), MEM_TOKEN_SIZE).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i,:end,:] = seq[:end] else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[-1]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain, entity,entity_cal,entity_nav,entity_wet, conv_seq, kb_arr = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) conv_seqs, conv_lengths = merge(conv_seq, max_len) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) conv_seqs = Variable(conv_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() conv_seqs = conv_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain, entity, entity_cal, entity_nav, entity_wet, conv_seqs, conv_lengths, kb_arr
Example #7
Source File: data.py From VSE-C with MIT License | 5 votes |
def collate_fn(data): """Build mini-batch tensors from a list of (image, caption) tuples. Args: data: list of (image, caption) tuple. - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, img_ids = list(zip(*data)) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merge captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths, ids
Example #8
Source File: data.py From pvse with MIT License | 5 votes |
def collate_fn(data): """Build mini-batch tensors from a list of (image, sentence) tuples. Args: data: list of (image, sentence) tuple. - image: torch tensor of shape (3, 256, 256) or (?, 3, 256, 256). - sentence: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256) or (batch_size, padded_length, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded sentence. """ # Sort a data list by sentence length data.sort(key=lambda x: len(x[1]), reverse=True) images, sentences, ids, img_ids = zip(*data) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merge sentences (convert tuple of 1D tensor to 2D tensor) cap_lengths = torch.tensor([len(cap) for cap in sentences]) targets = torch.zeros(len(sentences), max(cap_lengths)).long() for i, cap in enumerate(sentences): end = cap_lengths[i] targets[i, :end] = cap[:end] return images, targets, cap_lengths, ids
Example #9
Source File: utils_NMT.py From Mem2Seq with MIT License | 5 votes |
def collate_fn(data): def merge(sequences): lengths = [len(seq) for seq in sequences] padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[-1]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, max_len, src_plain,trg_plain = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs) trg_seqs, trg_lengths = merge(trg_seqs) ind_seqs, _ = merge(ind_seqs) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, src_plain, trg_plain
Example #10
Source File: image_data_loader.py From VideoSearchEngine with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, label_seqs, location_seqs = zip(*data) assert len(label_seqs) > 0 assert len(label_seqs) == len(location_seqs) # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] label_seq_lengths = [len(label_seq) for label_seq in label_seqs] label_seq_data = torch.zeros(len(label_seqs), max(label_seq_lengths)).long() for i, label_seq in enumerate(label_seqs): label_seq_data[i, :len(label_seq)] = torch.LongTensor(label_seq[:len(label_seq)]) location_seq_data = torch.zeros(len(location_seqs), max(label_seq_lengths), 4) for i, location_seq in enumerate(location_seqs): for j in range(len(location_seq)): coords = decode_location(location_seq[j]) location_seq_data[i, j] = coords return images, targets, lengths, label_seq_data, location_seq_data, label_seq_lengths
Example #11
Source File: utils_babi_mem2seq.py From Mem2Seq with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max(lengths), MEM_TOKEN_SIZE).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i,:end,:] = seq[:end] else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain, conv_seq, ent, ID, kb_arr = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) conv_seqs, conv_lengths = merge(conv_seq, max_len) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) conv_seqs = Variable(conv_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() conv_seqs = conv_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain, conv_seqs, conv_lengths, ent, ID, kb_arr
Example #12
Source File: utils_kvr.py From Mem2Seq with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.zeros(len(sequences), max(lengths)).long() else: padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain,entity,entity_cal,entity_nav,entity_wet = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain,entity,entity_cal,entity_nav,entity_wet
Example #13
Source File: until_temp.py From Mem2Seq with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.zeros(len(sequences), max_len[0]).long() else: padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, target_plain, max_len, src_plain = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) ind_seqs, ind_lenght = merge(ind_seqs,None) # gete_s, _ = merge(gete_s,None) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(torch.Tensor(trg_seqs)) ind_seqs = Variable(ind_seqs).transpose(0,1) # gete_s = Variable(gete_s).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() # gete_s = gete_s.cuda() return src_seqs, src_lengths, trg_seqs, ind_lenght, ind_seqs, target_plain, src_plain # Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
Example #14
Source File: data_loader.py From seq2seq-dataloader with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (src_seq, trg_seq). We should build a custom collate_fn rather than using default collate_fn, because merging sequences (including padding) is not supported in default. Seqeuences are padded to the maximum length of mini-batch sequences (dynamic padding). Args: data: list of tuple (src_seq, trg_seq). - src_seq: torch tensor of shape (?); variable length. - trg_seq: torch tensor of shape (?); variable length. Returns: src_seqs: torch tensor of shape (batch_size, padded_length). src_lengths: list of length (batch_size); valid length for each padded source sequence. trg_seqs: torch tensor of shape (batch_size, padded_length). trg_lengths: list of length (batch_size); valid length for each padded target sequence. """ def merge(sequences): lengths = [len(seq) for seq in sequences] padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs) trg_seqs, trg_lengths = merge(trg_seqs) return src_seqs, src_lengths, trg_seqs, trg_lengths
Example #15
Source File: data_loader.py From pytorch-tutorial with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions = zip(*data) # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths
Example #16
Source File: data.py From CAMP_iccv19 with Apache License 2.0 | 5 votes |
def collate_fn(data): """Build mini-batch tensors from a list of (image, caption) tuples. Args: data: list of (image, caption) tuple. - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, img_ids, img_cls = zip(*data) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merget captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] # targets = torch.zeros(len(captions), max(lengths)).long() targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] lengths = torch.Tensor(lengths) img_cls = torch.Tensor(img_cls).long() return images, targets, lengths, ids, img_cls
Example #17
Source File: coco_dataset.py From pytorch-gve-lrcn with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, *labels = zip(*data) if len(labels) > 0: return_labels = True labels = torch.cat(labels[0], 0) else: return_labels = False # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap)-1 for cap in captions] word_inputs = torch.zeros(len(captions), max(lengths)).long() word_targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] word_inputs[i, :end] = cap[:-1] word_targets[i, :end] = cap[1:] if return_labels: return images, word_inputs, word_targets, lengths, ids, labels else: return images, word_inputs, word_targets, lengths, ids
Example #18
Source File: data_loader.py From ACME with GNU General Public License v3.0 | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[5]), reverse=True) img, instrs, itr_ln, ingrs, igr_ln,\ ingr_cap, class_label, ret, one_hot_vec, food_id = zip(*data) # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(img, 0) instrs = torch.stack(instrs, 0) itr_ln = torch.LongTensor(list(itr_ln)) ingrs = torch.stack(ingrs, 0) igr_ln = torch.LongTensor(list(igr_ln)) class_label = torch.LongTensor(list(class_label)) ret = torch.stack(ret, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in ingr_cap] targets = torch.zeros(len(ingr_cap), max(lengths)).long() for i, cap in enumerate(ingr_cap): end = lengths[i] targets[i, :end] = cap[:end] one_hot_vec = torch.stack(one_hot_vec, 0) return [images, instrs, itr_ln, ingrs, igr_ln, list(food_id)], \ [images, instrs, itr_ln, ingrs, igr_ln, targets, lengths, class_label, ret, one_hot_vec]
Example #19
Source File: data.py From VSE-C with MIT License | 5 votes |
def collate_fn_train_text(data): """Build mini-batch tensors from a list of (image, caption) tuples. Args: data: list of (image, caption) tuple. - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, img_ids, extended_captions = list(zip(*data)) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merget captions (convert tuple of 1D tensor to 2D tensor) pn_number = len(extended_captions[0]) + 1 lengths = list() for cap in captions: lengths.extend([len(cap)] * pn_number) targets = torch.zeros(len(captions) * pn_number, max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i * pn_number] targets[i * pn_number, :end] = cap[:end] for i_, cap_ in enumerate(extended_captions[i]): targets[i * pn_number + i_ + 1, :end] = cap_[:end] return images, targets, lengths, ids
Example #20
Source File: data.py From VSE-C with MIT License | 5 votes |
def collate_fn_test_text(data): # Sort a data list by caption length data.sort(key=lambda x: len(x[0]), reverse=True) captions, ids = list(zip(*data)) # Merge captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return None, targets, lengths, ids
Example #21
Source File: utils_NMT.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences): lengths = [len(seq) for seq in sequences] padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[-1]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, max_len, src_plain,trg_plain = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs) trg_seqs, trg_lengths = merge(trg_seqs) ind_seqs, _ = merge(ind_seqs) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, src_plain, trg_plain
Example #22
Source File: utils_woz_mem2seq.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max(lengths), MEM_TOKEN_SIZE).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i,:end,:] = seq[:end] else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[-1]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain, entity, conv_seq, kb_arr = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) conv_seqs, conv_lengths = merge(conv_seq, max_len) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) conv_seqs = Variable(conv_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() conv_seqs = conv_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain, entity, conv_seqs, conv_lengths, kb_arr
Example #23
Source File: utils_kvr_mem2seq.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max(lengths), MEM_TOKEN_SIZE).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i,:end,:] = seq[:end] else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[-1]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain, entity,entity_cal,entity_nav,entity_wet, conv_seq, kb_arr = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) conv_seqs, conv_lengths = merge(conv_seq, max_len) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) conv_seqs = Variable(conv_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() conv_seqs = conv_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain, entity, entity_cal, entity_nav, entity_wet, conv_seqs, conv_lengths, kb_arr
Example #24
Source File: utils_babi_mem2seq.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max(lengths), MEM_TOKEN_SIZE).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i,:end,:] = seq[:end] else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain, conv_seq, ent, ID, kb_arr = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) conv_seqs, conv_lengths = merge(conv_seq, max_len) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) conv_seqs = Variable(conv_seqs).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() conv_seqs = conv_seqs.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain, conv_seqs, conv_lengths, ent, ID, kb_arr
Example #25
Source File: utils_kvr.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.zeros(len(sequences), max(lengths)).long() else: padded_seqs = torch.zeros(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain,entity,entity_cal,entity_nav,entity_wet = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain,entity,entity_cal,entity_nav,entity_wet
Example #26
Source File: utils_babi.py From ConvLab with MIT License | 5 votes |
def collate_fn(data): def merge(sequences,max_len): lengths = [len(seq) for seq in sequences] if (max_len): padded_seqs = torch.ones(len(sequences), max_len[0]).long() else: padded_seqs = torch.ones(len(sequences), max(lengths)).long() for i, seq in enumerate(sequences): end = lengths[i] padded_seqs[i, :end] = seq[:end] return padded_seqs, lengths # sort a list by sequence length (descending order) to use pack_padded_sequence data.sort(key=lambda x: len(x[0]), reverse=True) # seperate source and target sequences src_seqs, trg_seqs, ind_seqs, gete_s, max_len, src_plain,trg_plain = zip(*data) # merge sequences (from tuple of 1D tensor to 2D tensor) src_seqs, src_lengths = merge(src_seqs,max_len) trg_seqs, trg_lengths = merge(trg_seqs,None) ind_seqs, _ = merge(ind_seqs,None) gete_s, _ = merge(gete_s,None) src_seqs = Variable(src_seqs).transpose(0,1) trg_seqs = Variable(trg_seqs).transpose(0,1) ind_seqs = Variable(ind_seqs).transpose(0,1) gete_s = Variable(gete_s).transpose(0,1) if USE_CUDA: src_seqs = src_seqs.cuda() trg_seqs = trg_seqs.cuda() ind_seqs = ind_seqs.cuda() gete_s = gete_s.cuda() return src_seqs, src_lengths, trg_seqs, trg_lengths, ind_seqs, gete_s, src_plain, trg_plain
Example #27
Source File: data_loader.py From VideoSearchEngine with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions = zip(*data) # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths
Example #28
Source File: data.py From SCAN with Apache License 2.0 | 5 votes |
def collate_fn(data): """Build mini-batch tensors from a list of (image, caption) tuples. Args: data: list of (image, caption) tuple. - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, img_ids = zip(*data) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merget captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths, ids
Example #29
Source File: data.py From vsepp with Apache License 2.0 | 5 votes |
def collate_fn(data): """Build mini-batch tensors from a list of (image, caption) tuples. Args: data: list of (image, caption) tuple. - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length data.sort(key=lambda x: len(x[1]), reverse=True) images, captions, ids, img_ids = zip(*data) # Merge images (convert tuple of 3D tensor to 4D tensor) images = torch.stack(images, 0) # Merget captions (convert tuple of 1D tensor to 2D tensor) lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths, ids
Example #30
Source File: data_loader.py From VideoSearchEngine with MIT License | 5 votes |
def collate_fn(data): """Creates mini-batch tensors from the list of tuples (image, caption). We should build custom collate_fn rather than using default collate_fn, because merging caption (including padding) is not supported in default. Args: data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions = zip(*data) # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) # print(images.shape) # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): end = lengths[i] targets[i, :end] = cap[:end] return images, targets, lengths