Python eval.evaluate() Examples
The following are 6
code examples of eval.evaluate().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
eval
, or try the search function
.
Example #1
Source File: train.py From yolact with MIT License | 5 votes |
def compute_validation_map(epoch, iteration, yolact_net, dataset, log:Log=None): with torch.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)...", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) yolact_net.train()
Example #2
Source File: main.py From QGforQA with MIT License | 5 votes |
def evaluate_batch(config, model_para, model_qg, sess_para, sess_qg, num_batches, eval_file, iterator, id2word, map_to_orig, evaluate_func=evaluate): answer_dict = {} losses = [] next_element = iterator.get_next() for _ in tqdm(range(1, num_batches + 1)): para, para_unk, ques, labels, pos_tags, ner_tags, qa_id = sess_para.run(next_element) para_emb = sess_para.run(model_para.bert_emb, feed_dict={model_para.input_ids: para_unk}) loss, symbols, probs = sess_qg.run([model_qg.loss, model_qg.symbols, model_qg.probs], feed_dict={ model_qg.para: para, model_qg.bert_para: para_emb, model_qg.que: ques, model_qg.labels: labels, model_qg.pos_tags: pos_tags, model_qg.ner_tags: ner_tags, model_qg.qa_id: qa_id, model_qg.temperature: config.temperature, model_qg.diverse_rate: config.diverse_rate }) answer_dict_ = convert_tokens_seq(eval_file, qa_id, symbols, probs, id2word, map_to_orig) for key in answer_dict_: if key not in answer_dict: answer_dict[key] = answer_dict_[key] losses.append(loss) loss = np.mean(losses) metrics = evaluate_func(eval_file, answer_dict) metrics["loss"] = loss return metrics
Example #3
Source File: run_eval.py From g-tensorflow-models with Apache License 2.0 | 5 votes |
def main(_): tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.checkpoint_dir, "Flag 'checkpoint_dir' must be set." assert FLAGS.eval_dir, "Flag 'eval_dir' must be set." if FLAGS.config_file: for config_file in FLAGS.config_file: gin.parse_config_file(config_file) if FLAGS.params: gin.parse_config(FLAGS.params) eval_.evaluate(FLAGS.checkpoint_dir, FLAGS.eval_dir)
Example #4
Source File: run_eval.py From models with Apache License 2.0 | 5 votes |
def main(_): tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.checkpoint_dir, "Flag 'checkpoint_dir' must be set." assert FLAGS.eval_dir, "Flag 'eval_dir' must be set." if FLAGS.config_file: for config_file in FLAGS.config_file: gin.parse_config_file(config_file) if FLAGS.params: gin.parse_config(FLAGS.params) eval_.evaluate(FLAGS.checkpoint_dir, FLAGS.eval_dir)
Example #5
Source File: run_squad.py From BERT-for-Chinese-Question-Answering with Apache License 2.0 | 4 votes |
def run_evaluate(args, model, eval_features, device, eval_examples, tokenizer, best_dev_score=None): logger.info("***** Running predictions *****") logger.info(" Num orig examples = %d", len(eval_examples)) logger.info(" Num split examples = %d", len(eval_features)) logger.info(" Batch size = %d", args.predict_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) if args.local_rank == -1: eval_sampler = SequentialSampler(eval_data) else: eval_sampler = DistributedSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) model.eval() all_results = [] logger.info("Start evaluating") for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating"): if len(all_results) % 1000 == 0: logger.info("Processing example: %d" % (len(all_results))) input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append(RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") predict, nbest = write_predictions(eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, args.verbose_logging) with open(args.predict_file) as dataset_file: dataset_json = json.load(dataset_file) dataset = dataset_json['data'] dev_score = evaluate(dataset, predict, tokenizer)['f1'] logger.info('Dev score : {}'.format(dev_score)) if best_dev_score is not None and dev_score > best_dev_score: logger.info('Saving model with dev score: {}'.format(dev_score)) best_dev_score = dev_score write_to_file(predict, nbest, output_prediction_file, output_nbest_file) torch.save(model.state_dict(), args.finetuned_checkpoint) else: write_to_file(predict, nbest, output_prediction_file, output_nbest_file) return best_dev_score
Example #6
Source File: train.py From Pytorch-STN with MIT License | 4 votes |
def train_and_eval(net, train_loader, val_loader, optimizer, loss_fn, metrics, params, model_dir, restore=None): """ Train and evaluate every epoch of a model. net: The model. train/val loader: The data loaders params: The parameters parsed from JSON file restore: if there is a checkpoint restore from that point. """ best_val_acc = 0.0 if restore is not None: restore_file = os.path.join(args.param_path, args.resume_path + '_pth.tar') logging.info("Loaded checkpoints from:{}".format(restore_file)) utils.load_checkpoint(restore_file, net, optimizer) for ep in range(params.num_epochs): logging.info("Running epoch: {}/{}".format(ep+1, params.num_epochs)) # train one epoch train(net, train_loader, loss_fn, params, metrics, optimizer) val_metrics = evaluate(net, val_loader, loss_fn, params, metrics) val_acc = val_metrics['accuracy'] isbest = val_acc >= best_val_acc utils.save_checkpoint({"epoch":ep, "state_dict":net.state_dict(), "optimizer":optimizer.state_dict()}, isBest=isbest, ckpt_dir=model_dir) if isbest: # if the accuracy is great save it to best.json logging.info("New best accuracy found!") best_val_acc = val_acc best_json_path = os.path.join(model_dir, "best_model_params.json") utils.save_dict_to_json(val_metrics, best_json_path) last_acc_path = os.path.join(model_dir, 'last_acc_metrics.json') utils.save_dict_to_json(val_metrics, last_acc_path)