Python evaluation.Evaluation() Examples

The following are 9 code examples of evaluation.Evaluation(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module evaluation , or try the search function .
Example #1
Source File: evaluator.py    From aristo-leaderboard with Apache License 2.0 6 votes vote down vote up
def report(e: Evaluation, num_predictions: int, num_answers: int):
    i = e.inputs
    o = e.outputs
    c = e.conversions
    m = e.moves
    overall = e.overall
    print("=================================================")
    print("Question     Avg. Precision  Avg. Recall  Avg. F1")
    print("-------------------------------------------------")
    print("Inputs                %4.3f        %4.3f    %4.3f" % (i.precision, i.recall, i.F1()))
    print("Outputs               %4.3f        %4.3f    %4.3f" % (o.precision, o.recall, o.F1()))
    print("Conversions           %4.3f        %4.3f    %4.3f" % (c.precision, c.recall, c.F1()))
    print("Moves                 %4.3f        %4.3f    %4.3f" % (m.precision, m.recall, m.F1()))
    print("-------------------------------------------------")
    print("Overall Precision %4.3f                          " % overall.precision)
    print("Overall Recall    %4.3f                          " % overall.recall)
    print("Overall F1        %4.3f                          " % overall.F1())
    print("=================================================")
    print()
    print(f"Evaluated {num_predictions} predictions against {num_answers} answers.")
    print() 
Example #2
Source File: demo.py    From FashionAI_KeyPoint_Detection_Challenge_Keras with MIT License 6 votes vote down vote up
def demo(modelfile):

    # load network
    xEval = Evaluation('all', modelfile)

    # load images and run prediction
    testfile = os.path.join("../../data/test/", 'test.csv')
    xdf = pd.read_csv(testfile)
    xdf = xdf.sample(frac=1.0)

    for _index, _row in xdf.iterrows():
        _image_id = _row['image_id']
        _category = _row['image_category']
        imageName = os.path.join("../../data/test", _image_id)
        print _image_id, _category
        dtkp = xEval.predict_kp_with_rotate(imageName, _category)
        visualize_keypoint(imageName, _category, dtkp) 
Example #3
Source File: rationale.py    From rcnn with Apache License 2.0 6 votes vote down vote up
def evaluate(self, data, eval_func):
        res = [ ]
        for idts, labels in data:
            scores = eval_func(idts)
            #print scores.shape, len(labels)
            #print labels
            assert len(scores) == len(labels)
            ranks = (-scores).argsort()
            ranked_labels = labels[ranks]
            res.append(ranked_labels)
        e = Evaluation(res)
        MAP = e.MAP()*100
        MRR = e.MRR()*100
        P1 = e.Precision(1)*100
        P5 = e.Precision(5)*100
        return MAP, MRR, P1, P5 
Example #4
Source File: eval_callback.py    From FashionAI_KeyPoint_Detection_Challenge_Keras with MIT License 5 votes vote down vote up
def on_epoch_end(self, epoch, logs=None):
        modelName = os.path.join(self.foldPath, self.category+"_weights_"+str(epoch)+".hdf5")
        keras.models.save_model(self.model, modelName)
        print "Saving model to ", modelName

        print "Runing evaluation ........."

        xEval = Evaluation(self.category, None)
        xEval.init_from_model(self.model)

        start = time()
        neScore, categoryDict = xEval.eval(self.multiOut, details=True)
        end = time()
        print "Evaluation Done", str(neScore), " cost ", end - start, " seconds!"

        for key in categoryDict.keys():
            scores = categoryDict[key]
            print key, ' score ', sum(scores)/len(scores)

        with open(self.valLog , 'a+') as xfile:
            xfile.write(modelName + ", Socre "+ str(neScore)+"\n")
            for key in categoryDict.keys():
                scores = categoryDict[key]
                xfile.write(key + ": " + str(sum(scores)/len(scores)) + "\n")

        xfile.close() 
Example #5
Source File: test.py    From FashionAI_KeyPoint_Detection_Challenge_Keras with MIT License 5 votes vote down vote up
def main_test(savepath, modelpath, augmentFlag):

    valfile = os.path.join(modelpath, 'val.log')
    bestmodels = get_best_single_model(valfile)

    print bestmodels, augmentFlag

    xEval = Evaluation('all', bestmodels[0])

    # load images and run prediction
    testfile = os.path.join("../../data/test/", 'test.csv')

    for category in ['skirt', 'blouse', 'trousers', 'outwear', 'dress']:
        xdict = dict()
        xdf = load_image_names(testfile, category)
        print len(xdf), " images to process ", category

        count = 0
        for _index, _row in xdf.iterrows():
            count += 1
            if count%1000 == 0:
                print count, "images have been processed"

            _image_id = _row['image_id']
            imageName = os.path.join("../../data/test", _image_id)
            if augmentFlag:
                dtkp = xEval.predict_kp_with_rotate(imageName, _row['image_category'])
            else:
                dtkp = xEval.predict_kp(imageName, _row['image_category'], multiOutput=True)
            xdict[_image_id] = dtkp

        savefile = os.path.join(savepath, category+'.pkl')
        with open(savefile, 'wb') as xfile:
            pickle.dump(xdict, xfile)

        print "prediction save to ", savefile 
Example #6
Source File: main.py    From rcnn with Apache License 2.0 5 votes vote down vote up
def evaluate(self, data, eval_func):
        res = [ ]
        for idts, idbs, labels in data:
            scores = eval_func(idts, idbs)
            assert len(scores) == len(labels)
            ranks = (-scores).argsort()
            ranked_labels = labels[ranks]
            res.append(ranked_labels)
        e = Evaluation(res)
        MAP = e.MAP()*100
        MRR = e.MRR()*100
        P1 = e.Precision(1)*100
        P5 = e.Precision(5)*100
        return MAP, MRR, P1, P5 
Example #7
Source File: main.py    From rcnn with Apache License 2.0 5 votes vote down vote up
def evaluate(self, data, eval_func):
        res = [ ]
        for t, b, labels in data:
            idts, idbs = myio.create_one_batch(t, b, self.padding_id)
            scores = eval_func(idts)
            #assert len(scores) == len(labels)
            ranks = (-scores).argsort()
            ranked_labels = labels[ranks]
            res.append(ranked_labels)
        e = Evaluation(res)
        MAP = e.MAP()*100
        MRR = e.MRR()*100
        P1 = e.Precision(1)*100
        P5 = e.Precision(5)*100
        return MAP, MRR, P1, P5 
Example #8
Source File: test.py    From blitznet with MIT License 4 votes vote down vote up
def main(argv=None):  # pylint: disable=unused-argument
    assert args.ckpt > 0 or args.batch_eval
    assert args.detect or args.segment, "Either detect or segment should be True"
    if args.trunk == 'resnet50':
        net = ResNet
        depth = 50
    if args.trunk == 'resnet101':
        net = ResNet
        depth = 101
    if args.trunk == 'vgg16':
        net = VGG
        depth = 16

    net = net(config=net_config, depth=depth, training=False)

    if args.dataset == 'voc07' or args.dataset == 'voc07+12':
        loader = VOCLoader('07', 'test')
    if args.dataset == 'voc12':
        loader = VOCLoader('12', 'val', segmentation=args.segment)
    if args.dataset == 'coco':
        loader = COCOLoader(args.split)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False)) as sess:
        detector = Detector(sess, net, loader, net_config, no_gt=args.no_seg_gt)
        if args.dataset == 'coco':
            tester = COCOEval(detector, loader)
        else:
            tester = Evaluation(detector, loader, iou_thresh=args.voc_iou_thresh)
        if not args.batch_eval:
            detector.restore_from_ckpt(args.ckpt)
            tester.evaluate_network(args.ckpt)
        else:
            log.info('Evaluating %s' % args.run_name)
            ckpts_folder = CKPT_ROOT + args.run_name + '/'
            out_file = ckpts_folder + evaluation_logfile

            max_checked = get_last_eval(out_file)
            log.debug("Maximum checked ckpt is %i" % max_checked)
            with open(out_file, 'a') as f:
                start = max(args.min_ckpt, max_checked+1)
                ckpt_files = glob(ckpts_folder + '*.data*')
                folder_has_nums = np.array(list((map(filename2num, ckpt_files))), dtype='int')
                nums_available = sorted(folder_has_nums[folder_has_nums >= start])
                nums_to_eval = [nums_available[-1]]
                for n in reversed(nums_available):
                    if nums_to_eval[-1] - n >= args.step:
                        nums_to_eval.append(n)
                nums_to_eval.reverse()

                for ckpt in nums_to_eval:
                    log.info("Evaluation of ckpt %i" % ckpt)
                    tester.reset()
                    detector.restore_from_ckpt(ckpt)
                    res = tester.evaluate_network(ckpt)
                    f.write(res)
                    f.flush() 
Example #9
Source File: frcnn.py    From incremental_detectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def eval_network(sess):
    net = Network(num_classes=args.num_classes+args.extend, distillation=False)
    _, _, remain = split_classes()
    loader = get_loader(False, remain)
    is_voc = loader.dataset == 'voc'

    if args.eval_ckpts != '':
        ckpts = args.eval_ckpts.split(',')
    else:
        ckpts = [args.ckpt]
    global_results = {cat: [] for cat in loader.categories}
    global_results[AVERAGE+" 1-10"] = []
    global_results[AVERAGE+" 11-20"] = []
    global_results[AVERAGE+" ALL"] = []
    for ckpt in ckpts:
        if ckpt[-1].lower() == 'k':
            ckpt_num = int(ckpt[:-1])*1000
        else:
            ckpt_num = int(ckpt)
        init_op, init_feed_dict = restore_ckpt(ckpt_num=ckpt_num)
        sess.run(init_op, feed_dict=init_feed_dict)
        log.info("Checkpoint {}".format(ckpt))
        if is_voc:
            results = Evaluation(net, loader, ckpt_num, args.conf_thresh, args.nms_thresh).evaluate_network(args.eval_first_n)
            for cat in loader.categories:
                global_results[cat].append(results[cat] if cat in results else 0.0)
            # TODO add output formating, line after learnt cats
            old_classes = [results.get(k, 0) for k in loader.categories[:10]]
            new_classes = [results.get(k, 0) for k in loader.categories[10:]]
            all_classes = [results.get(k, 0) for k in loader.categories]
            global_results[AVERAGE+" 1-10"].append(np.mean(old_classes))
            global_results[AVERAGE+" 11-20"].append(np.mean(new_classes))
            global_results[AVERAGE+" ALL"].append(np.mean(all_classes))

            headers = ['Category'] + [("mAP (%s, %i img)" % (ckpt, args.eval_first_n)) for ckpt in ckpts]
            table_src = []
            for cat in loader.categories:
                table_src.append([cat] + global_results[cat])
            table_src.append([AVERAGE+" 1-10", ] + global_results[AVERAGE+" 1-10"])
            table_src.append([AVERAGE+" 11-20", ] + global_results[AVERAGE+" 11-20"])
            table_src.append([AVERAGE+" ALL", ] + global_results[AVERAGE+" ALL"])
            out = tabulate(table_src, headers=headers,
                           floatfmt=".1f", tablefmt='orgtbl')
            with open("/home/lear/kshmelko/scratch/logs/results_voc/%s.pkl" % args.run_name, 'wb') as f:
                pickle.dump(global_results, f, pickle.HIGHEST_PROTOCOL)
            log.info("Summary table over %i checkpoints\nExperiment: %s\n%s", len(ckpts), args.run_name, out)
        else:
            results = COCOEval(net, loader, ckpt_num, args.conf_thresh, args.nms_thresh).evaluate_network(args.eval_first_n)