Python evaluation.i2t() Examples
The following are 6
code examples of evaluation.i2t().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
evaluation
, or try the search function
.
Example #1
Source File: train.py From VSE-C with MIT License | 5 votes |
def validate(opt, val_loader, model): # compute the encoding for all the validation images and captions img_embs, cap_embs = encode_data( model, val_loader, opt.log_step, logging.info) # caption retrieval (r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, measure=opt.measure) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) # image retrieval (r1i, r5i, r10i, medri, meanr) = t2i( img_embs, cap_embs, measure=opt.measure) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanr)) # sum of recalls to be used for early stopping currscore = r1 + r5 + r10 + r1i + r5i + r10i # record metrics in tensorboard tb_logger.log_value('r1', r1, step=model.Eiters) tb_logger.log_value('r5', r5, step=model.Eiters) tb_logger.log_value('r10', r10, step=model.Eiters) tb_logger.log_value('medr', medr, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('r1i', r1i, step=model.Eiters) tb_logger.log_value('r5i', r5i, step=model.Eiters) tb_logger.log_value('r10i', r10i, step=model.Eiters) tb_logger.log_value('medri', medri, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('rsum', currscore, step=model.Eiters) return currscore
Example #2
Source File: train.py From vsepp with Apache License 2.0 | 5 votes |
def validate(opt, val_loader, model): # compute the encoding for all the validation images and captions img_embs, cap_embs = encode_data( model, val_loader, opt.log_step, logging.info) # caption retrieval (r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, measure=opt.measure) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) # image retrieval (r1i, r5i, r10i, medri, meanr) = t2i( img_embs, cap_embs, measure=opt.measure) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanr)) # sum of recalls to be used for early stopping currscore = r1 + r5 + r10 + r1i + r5i + r10i # record metrics in tensorboard tb_logger.log_value('r1', r1, step=model.Eiters) tb_logger.log_value('r5', r5, step=model.Eiters) tb_logger.log_value('r10', r10, step=model.Eiters) tb_logger.log_value('medr', medr, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('r1i', r1i, step=model.Eiters) tb_logger.log_value('r5i', r5i, step=model.Eiters) tb_logger.log_value('r10i', r10i, step=model.Eiters) tb_logger.log_value('medri', medri, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('rsum', currscore, step=model.Eiters) return currscore
Example #3
Source File: train.py From CAMP_iccv19 with Apache License 2.0 | 5 votes |
def validate(opt, val_loader, model, tb_logger): # compute the encoding for all the validation images and captions print("start validate") model.val_start() img_embs, cap_embs, cap_masks = encode_data( model, val_loader, opt.log_step, logging.info) # caption retrieval (i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr), (t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr) = i2t(img_embs, cap_embs, cap_masks, measure=opt.measure, model=model) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr)) # image retrieval #(r1i, r5i, r10i, medri, meanr) = t2i( # img_embs, cap_embs, measure=opt.measure, model=model) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr)) # sum of recalls to be used for early stopping currscore = i2t_r1 + i2t_r5 + i2t_r10 + t2i_r1 + t2i_r5 + t2i_r10 # record metrics in tensorboard tb_logger.log_value('i2t_r1', i2t_r1, step=model.Eiters) tb_logger.log_value('i2t_r5', i2t_r5, step=model.Eiters) tb_logger.log_value('i2t_r10', i2t_r10, step=model.Eiters) tb_logger.log_value('i2t_medr', i2t_medr, step=model.Eiters) tb_logger.log_value('i2t_meanr', i2t_meanr, step=model.Eiters) tb_logger.log_value('t2i_r1', t2i_r1, step=model.Eiters) tb_logger.log_value('t2i_r5', t2i_r5, step=model.Eiters) tb_logger.log_value('t2i_r10', t2i_r10, step=model.Eiters) tb_logger.log_value('t2i_medr', t2i_medr, step=model.Eiters) tb_logger.log_value('t2i_meanr', t2i_meanr, step=model.Eiters) tb_logger.log_value('rsum', currscore, step=model.Eiters) return currscore
Example #4
Source File: trainer.py From dual_encoding with Apache License 2.0 | 4 votes |
def parse_args(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument('--rootpath', type=str, default=ROOT_PATH, help='path to datasets. (default: %s)'%ROOT_PATH) parser.add_argument('trainCollection', type=str, help='train collection') parser.add_argument('valCollection', type=str, help='validation collection') parser.add_argument('testCollection', type=str, help='test collection') parser.add_argument('--n_caption', type=int, default=20, help='number of captions of each image/video (default: 1)') parser.add_argument('--overwrite', type=int, default=0, choices=[0,1], help='overwrite existed file. (default: 0)') # model parser.add_argument('--model', type=str, default='dual_encoding', help='model name. (default: dual_encoding)') parser.add_argument('--concate', type=str, default='full', help='feature concatenation style. (full|reduced) full=level 1+2+3; reduced=level 2+3') parser.add_argument('--measure', type=str, default='cosine', help='measure method. (default: cosine)') parser.add_argument('--dropout', default=0.2, type=float, help='dropout rate (default: 0.2)') # text-side multi-level encoding parser.add_argument('--vocab', type=str, default='word_vocab_5', help='word vocabulary. (default: word_vocab_5)') parser.add_argument('--word_dim', type=int, default=500, help='word embedding dimension') parser.add_argument('--text_rnn_size', type=int, default=512, help='text rnn encoder size. (default: 1024)') parser.add_argument('--text_kernel_num', default=512, type=int, help='number of each kind of text kernel') parser.add_argument('--text_kernel_sizes', default='2-3-4', type=str, help='dash-separated kernel size to use for text convolution') parser.add_argument('--text_norm', action='store_true', help='normalize the text embeddings at last layer') # video-side multi-level encoding parser.add_argument('--visual_feature', type=str, default='resnet-152-img1k-flatten0_outputos', help='visual feature.') parser.add_argument('--visual_rnn_size', type=int, default=1024, help='visual rnn encoder size') parser.add_argument('--visual_kernel_num', default=512, type=int, help='number of each kind of visual kernel') parser.add_argument('--visual_kernel_sizes', default='2-3-4-5', type=str, help='dash-separated kernel size to use for visual convolution') parser.add_argument('--visual_norm', action='store_true', help='normalize the visual embeddings at last layer') # common space learning parser.add_argument('--text_mapping_layers', type=str, default='0-2048', help='text fully connected layers for common space learning. (default: 0-2048)') parser.add_argument('--visual_mapping_layers', type=str, default='0-2048', help='visual fully connected layers for common space learning. (default: 0-2048)') # loss parser.add_argument('--loss_fun', type=str, default='mrl', help='loss function') parser.add_argument('--margin', type=float, default=0.2, help='rank loss margin') parser.add_argument('--direction', type=str, default='all', help='retrieval direction (all|t2i|i2t)') parser.add_argument('--max_violation', action='store_true', help='use max instead of sum in the rank loss') parser.add_argument('--cost_style', type=str, default='sum', help='cost style (sum, mean). (default: sum)') # optimizer parser.add_argument('--optimizer', type=str, default='adam', help='optimizer. (default: rmsprop)') parser.add_argument('--learning_rate', type=float, default=0.0001, help='initial learning rate') parser.add_argument('--lr_decay_rate', default=0.99, type=float, help='learning rate decay rate. (default: 0.99)') parser.add_argument('--grad_clip', type=float, default=2, help='gradient clipping threshold') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--val_metric', default='recall', type=str, help='performance metric for validation (mir|recall)') # misc parser.add_argument('--num_epochs', default=50, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument('--workers', default=5, type=int, help='Number of data loader workers.') parser.add_argument('--postfix', default='runs_0', help='Path to save the model and Tensorboard log.') parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.') parser.add_argument('--cv_name', default='cvpr_2019', type=str, help='') args = parser.parse_args() return args
Example #5
Source File: train.py From SCAN with Apache License 2.0 | 4 votes |
def validate(opt, val_loader, model): # compute the encoding for all the validation images and captions img_embs, cap_embs, cap_lens = encode_data( model, val_loader, opt.log_step, logging.info) img_embs = numpy.array([img_embs[i] for i in range(0, len(img_embs), 5)]) start = time.time() if opt.cross_attn == 't2i': sims = shard_xattn_t2i(img_embs, cap_embs, cap_lens, opt, shard_size=128) elif opt.cross_attn == 'i2t': sims = shard_xattn_i2t(img_embs, cap_embs, cap_lens, opt, shard_size=128) else: raise NotImplementedError end = time.time() print("calculate similarity time:", end-start) # caption retrieval (r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, cap_lens, sims) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) # image retrieval (r1i, r5i, r10i, medri, meanr) = t2i( img_embs, cap_embs, cap_lens, sims) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanr)) # sum of recalls to be used for early stopping currscore = r1 + r5 + r10 + r1i + r5i + r10i # record metrics in tensorboard tb_logger.log_value('r1', r1, step=model.Eiters) tb_logger.log_value('r5', r5, step=model.Eiters) tb_logger.log_value('r10', r10, step=model.Eiters) tb_logger.log_value('medr', medr, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('r1i', r1i, step=model.Eiters) tb_logger.log_value('r5i', r5i, step=model.Eiters) tb_logger.log_value('r10i', r10i, step=model.Eiters) tb_logger.log_value('medri', medri, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('rsum', currscore, step=model.Eiters) return currscore
Example #6
Source File: test_modules.py From CAMP_iccv19 with Apache License 2.0 | 4 votes |
def test_CAMP_model(config_path): print("OK!") logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) parser = argparse.ArgumentParser() #config_path = "./experiments/f30k_cross_attention/config_test.yaml" with open(config_path) as f: opt = yaml.load(f) opt = EasyDict(opt['common']) vocab = pickle.load(open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name), 'rb')) opt.vocab_size = len(vocab) train_logger = LogCollector() print("----Start init model----") CAMP = model.CAMP(opt) CAMP.logger = train_logger if opt.resume is not None: ckp = torch.load(opt.resume) CAMP.load_state_dict(ckp["model"]) CAMP.train_start() print("----Model init success----") """ fake_img = torch.randn(16, 36, opt.img_dim) fake_text = torch.ones(16, 32).long() fake_lengths = torch.Tensor([32] * 16) fake_pos = torch.ones(16, 32).long() fake_ids = torch.ones(16).long() CAMP.train_emb(fake_img, fake_text, fake_lengths, instance_ids=fake_ids) print("----Test train_emb success----") """ train_loader, val_loader = data.get_loaders( opt.data_name, vocab, opt.crop_size, 128, 4, opt) test_loader = data.get_test_loader("test", opt.data_name, vocab, opt.crop_size, 128, 4, opt) CAMP.val_start() img_embs, cap_embs, cap_masks = encode_data( CAMP, test_loader, opt.log_step, logging.info) (r1, r5, r10, medr, meanr), (r1i, r5i, r10i, medri, meanri), score_matrix= i2t(img_embs, cap_embs, cap_masks, measure=opt.measure, model=CAMP, return_ranks=True) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri))