Python Examples of sklearn.metrics.average_precision

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

8 votes

def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr

Source File: utils.py From node_embedding_attack with MIT License

6 votes

def evaluate_embedding_link_prediction(adj_matrix, node_pairs, embedding_matrix, norm=False):
    """Evaluate the node embeddings on the link prediction task.

    :param adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
        Adjacency matrix of the graph
    :param node_pairs:
    :param embedding_matrix: np.ndarray, shape [n_nodes, embedding_dim]
        Embedding matrix
    :param norm: bool
        Whether to normalize the embeddings
    :return: float, float
        Average precision (AP) score and area under ROC curve (AUC) score
    """
    if norm:
        embedding_matrix = normalize(embedding_matrix)

    true = adj_matrix[node_pairs[:, 0], node_pairs[:, 1]].A1
    scores = (embedding_matrix[node_pairs[:, 0]] * embedding_matrix[node_pairs[:, 1]]).sum(1)

    auc_score, ap_score = roc_auc_score(true, scores), average_precision_score(true, scores)

    return auc_score, ap_score

Source File: autoencoder.py From pytorch_geometric with MIT License

6 votes

def test(self, z, pos_edge_index, neg_edge_index):
        r"""Given latent variables :obj:`z`, positive edges
        :obj:`pos_edge_index` and negative edges :obj:`neg_edge_index`,
        computes area under the ROC curve (AUC) and average precision (AP)
        scores.

        Args:
            z (Tensor): The latent space :math:`\mathbf{Z}`.
            pos_edge_index (LongTensor): The positive edges to evaluate
                against.
            neg_edge_index (LongTensor): The negative edges to evaluate
                against.
        """
        pos_y = z.new_ones(pos_edge_index.size(1))
        neg_y = z.new_zeros(neg_edge_index.size(1))
        y = torch.cat([pos_y, neg_y], dim=0)

        pos_pred = self.decoder(z, pos_edge_index, sigmoid=True)
        neg_pred = self.decoder(z, neg_edge_index, sigmoid=True)
        pred = torch.cat([pos_pred, neg_pred], dim=0)

        y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()

        return roc_auc_score(y, pred), average_precision_score(y, pred)

Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_average_precision_score_pos_label_errors():
    # Raise an error when pos_label is not in binary y_true
    y_true = np.array([0, 1])
    y_pred = np.array([0, 1])
    error_message = ("pos_label=2 is invalid. Set it to a label in y_true.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=2)
    # Raise an error for multilabel-indicator y_true with
    # pos_label other than 1
    y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
    y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]])
    error_message = ("Parameter pos_label is fixed to 1 for multilabel"
                     "-indicator y_true. Do not set pos_label or set "
                     "pos_label to 1.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=0)

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

6 votes

def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr

Source File: cnn_train.py From RelationNetworks-CLEVR with MIT License

6 votes

def test(data, model, epoch, args):
    model.eval()

    n_iters = 0
    ap_sum = 0.0

    progress_bar = tqdm(data)
    for batch_idx, sample_batched in enumerate(progress_bar):
        img, target = load_tensor_data(sample_batched, args.cuda, volatile=True)
        
        output = model(img)
        ap = average_precision_score(target.data, output.data) 
        n_iters += 1
        ap_sum += ap
        if batch_idx % args.log_interval == 0:
            m_ap = ap_sum / n_iters
            progress_bar.set_postfix(dict(AP='{:.2}'.format(m_ap)))

    m_ap = ap_sum / n_iters
    print('Test Epoch {}: Avg. Precision Score = {:.2};'.format(epoch, m_ap))

Source File: main.py From AutoOut with MIT License

6 votes

def calculate_scores(y_predicted, y_true):
    """
    Function to calculate different performance scores
    """
    accuracy = accuracy_score(y_pred=y_predicted, y_true=y_true)
    precision = precision_score(y_pred=y_predicted, y_true=y_true)
    average_precision_score1 = average_precision_score(y_score=y_predicted, y_true=y_true)
    f1_score1 = f1_score(y_pred=y_predicted, y_true=y_true)

    print("Accuracy score:", accuracy)
    print("Precision score:", precision)
    print("Average Precision score:", average_precision_score1)
    print("F1 score:", f1_score1)
    print("Outlier detection and/or treatment completed.")

    return {"accuracy": accuracy,
            "precision": precision,
            "average_precision_score": average_precision_score1,
            "f1_score": f1_score1,
            }

Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _average_precision_slow(y_true, y_score):
    """A second alternative implementation of average precision that closely
    follows the Wikipedia article's definition (see References). This should
    give identical results as `average_precision_score` for all inputs.

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
       <https://en.wikipedia.org/wiki/Average_precision>`_
    """
    precision, recall, threshold = precision_recall_curve(y_true, y_score)
    precision = list(reversed(precision))
    recall = list(reversed(recall))
    average_precision = 0
    for i in range(1, len(precision)):
        average_precision += precision[i] * (recall[i] - recall[i - 1])
    return average_precision

Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_score_scale_invariance():
    # Test that average_precision_score and roc_auc_score are invariant by
    # the scaling or shifting of probabilities
    # This test was expanded (added scaled_down) in response to github
    # issue #3864 (and others), where overly aggressive rounding was causing
    # problems for users with very small y_score values
    y_true, _, probas_pred = make_prediction(binary=True)

    roc_auc = roc_auc_score(y_true, probas_pred)
    roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
    roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled_up)
    assert_equal(roc_auc, roc_auc_scaled_down)
    assert_equal(roc_auc, roc_auc_shifted)

    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled_up)
    assert_equal(pr_auc, pr_auc_scaled_down)
    assert_equal(pr_auc, pr_auc_shifted)

Source File: utils.py From graph2gauss with MIT License

6 votes

def score_link_prediction(labels, scores):
    """
    Calculates the area under the ROC curve and the average precision score.

    Parameters
    ----------
    labels : array-like, shape [N]
        The ground truth labels
    scores : array-like, shape [N]
        The (unnormalized) scores of how likely are the instances

    Returns
    -------
    roc_auc : float
        Area under the ROC curve score
    ap : float
        Average precision score
    """

    return roc_auc_score(labels, scores), average_precision_score(labels, scores)

Source File: metrics.py From LaSO with BSD 3-Clause "New" or "Revised" License

6 votes

def average_precision_compute_fn(y_preds, y_targets, mask, activation=None):
    try:
        from sklearn.metrics import average_precision_score
    except ImportError:
        raise RuntimeError("This contrib module requires sklearn to be installed.")

    y_true = y_targets.numpy()
    if activation is not None:
        y_preds = activation(y_preds)
    y_pred = y_preds.numpy()

    if mask is not None:
        y_true = y_true[:, mask]
        y_pred = y_pred[:, mask]

    return average_precision_score(y_true, y_pred)

Source File: oxford.py From paiss with MIT License

6 votes

def get_ap(self, q_name, sorted_idx):
        rel   = self.__relevants[q_name]
        junk  = self.__junk[q_name]

        # construct ground-truth and scores:
        y_scores = np.zeros(self.N_images)
        y_true   = np.zeros(self.N_images)
        for e,i in enumerate(sorted_idx): y_scores[i] = self.N_images - e
        for i in rel: y_true[i] = 1

        # remove junk:
        y_scores = np.delete(y_scores, junk)
        y_true   = np.delete(y_true, junk)

        # compute ap:
        return average_precision_score(y_true, y_scores)

Source File: classifier.py From Fake_News_Detection with MIT License

6 votes

def plot_PR_curve(classifier):
    
    precision, recall, thresholds = precision_recall_curve(DataPrep.test_news['Label'], classifier)
    average_precision = average_precision_score(DataPrep.test_news['Label'], classifier)
    
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Random Forest Precision-Recall curve: AP={0:0.2f}'.format(
              average_precision))

Source File: learning.py From deep-action-proposals with MIT License

6 votes

def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program #################################

Source File: utils.py From s3d.pytorch with MIT License

5 votes

def get_AP_video(rst,label,gt_labels):
    gt_labels_to1=np.array([1 if (_==label and label!=0) else 0 for _ in gt_labels])
    rst_to1=np.array(rst[_][label] for _ in range(rst))

    AP=average_precision_score(gt_labels_to1,rst_to1)
    return AP

Source File: evaluate.py From bird-species-classification with MIT License

5 votes

def mean_average_precision(y_trues, y_scores):
    """
    y_trues  : [nb_samples, nb_classes]
    y_scores : [nb_samples, nb_classes]

    map      : float (MAP)
    """
    aps = []
    for y_t, y_s in zip(y_trues, y_scores):
        ap = metrics.average_precision_score(y_t, y_s)
        aps.append(ap)
    return np.mean(np.array(aps))

Source File: learning_analysis.py From autolab_core with Apache License 2.0

5 votes

def ap_score(self):
        pred_probs_vec, labels_vec = self.label_vectors
        ap = 0.0
        try:
            ap = sm.average_precision_score(labels_vec, pred_probs_vec)
        except:
            pass
        return ap

Source File: test_sklearn_metrics.py From poutyne with GNU Lesser General Public License v3.0

5 votes

def test_classification_with_custom_names(self):
        roc_names = 'roc'
        self._test_classification(roc_auc_score, True, names=roc_names)
        self._test_classification(roc_auc_score, False, names=roc_names)

        ap_names = 'ap'
        self._test_classification(average_precision_score, True, names=ap_names)
        self._test_classification(average_precision_score, False, names=ap_names)

        two_names = ['roc', 'ap']
        two_skl_metrics = [roc_auc_score, average_precision_score]
        self._test_classification(two_skl_metrics, True, names=two_names)
        self._test_classification(two_skl_metrics, False, names=two_names)

Source File: metrics.py From video-long-term-feature-banks with Apache License 2.0

5 votes

def mean_ap_metric(predicts, targets):
    """Compute mAP, wAP, AUC for Charades."""

    predicts = np.vstack(predicts)
    targets = np.vstack(targets)
    logger.info(
        "Getting mAP for {} examples".format(
            predicts.shape[0]
        ))
    start_time = time.time()

    predict = predicts[:, ~np.all(targets == 0, axis=0)]
    target = targets[:, ~np.all(targets == 0, axis=0)]
    mean_auc = 0
    aps = [0]
    try:
        mean_auc = metrics.roc_auc_score(target, predict)
    except ValueError:
        print(
            'The roc_auc curve requires a sufficient number of classes \
            which are missing in this sample.'
        )
    try:
        aps = metrics.average_precision_score(target, predict, average=None)
    except ValueError:
        print(
            'Average precision requires a sufficient number of samples \
            in a batch which are missing in this sample.'
        )

    mean_ap = np.mean(aps)
    weights = np.sum(target.astype(float), axis=0)
    weights /= np.sum(weights)
    mean_wap = np.sum(np.multiply(aps, weights))
    all_aps = np.zeros((1, targets.shape[1]))
    all_aps[:, ~np.all(targets == 0, axis=0)] = aps

    logger.info('\tDone in {} seconds'.format(time.time() - start_time))
    return mean_auc, mean_ap, mean_wap, all_aps.flatten()

Source File: metrics.py From temporal-segment-networks with BSD 2-Clause "Simplified" License

5 votes

def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap

Source File: custom_scores_HO.py From Auto_ViML with Apache License 2.0

5 votes

def gini_average_precision(truth, predictions):
    return 1-average_precision_score(truth, predictions.argmax(axis=1),average='weighted')

Source File: custom_scores.py From Auto_ViML with Apache License 2.0

5 votes

def gini_average_precision(truth, predictions):
    return average_precision_score(truth, predictions.argmax(axis=1),average='weighted')

Source File: analysis.py From PointNetGPD with MIT License

5 votes

def ap_score(self):
        pred_probs_vec, labels_vec = self.label_vectors()
        return sm.average_precision_score(labels_vec, pred_probs_vec)

Source File: eval.py From Pyramid-Attention-Networks-pytorch with GNU General Public License v3.0

5 votes

def test(data_loader):
    global best_acc
    convnet.eval()
    pan.eval()
    all_i_count = []
    all_u_count = []
    y_true = []
    y_pred = []
    loss_ic = 0
    pixel_acc = 0
    for batch_idx, (imgs, cls_labels, mask_labels) in enumerate(data_loader):
        with torch.no_grad():
            imgs, cls_labels = imgs.to(device), cls_labels.to(device)
            imgs_ab = imgs[:, 1:, :, :]
            fms_blob, out_convnet = convnet(imgs)
            out_cls = classifier(out_convnet.detach())
            out_pan = pan(fms_blob[::-1])
            mask_pred = mask_classifier(out_pan)
            mask_pred = F.interpolate(mask_pred, scale_factor=4, mode='nearest')

            # Image Colorization Loss
            color_pred = color_classifier(out_pan)
            color_pred = F.interpolate(color_pred, scale_factor=4, mode='nearest')
            loss_ic += F.mse_loss(color_pred, imgs_ab).item()

        # results
        y_pred.append(torch.sigmoid(out_cls).data.cpu().numpy())
        y_true.append(cls_labels.data.cpu().numpy())

        i_count, u_count = get_each_cls_iu(mask_pred.max(1)[1].cpu().data.numpy(), mask_labels.long().squeeze(1).numpy())
        all_i_count.append(i_count)
        all_u_count.append(u_count)
        pixel_acc += mask_pred.max(dim=1)[1].data.cpu().eq(mask_labels.long().squeeze(1)).float().mean().item()

    # Result
    acc = average_precision_score(np.concatenate(y_true, 0), np.concatenate(y_pred, 0))
    each_cls_IOU = (np.array(all_i_count).sum(0) / np.array(all_u_count).sum(0))
    mIOU = each_cls_IOU.mean()
    pixel_acc = pixel_acc / length_test_dataset
    loss_ic = loss_ic / length_test_dataset
    logging.info("Length of test set:{:} Test Cls Acc:{:.4f}% Each_cls_IOU:{:} mIOU:{:.4f} PA:{:.2f}% loss_ic{:.6f}".format(length_test_dataset, acc*100, dict(zip(test_data.classes, (100*each_cls_IOU).tolist())), mIOU*100, pixel_acc*100, loss_ic))

Source File: utils.py From LaSO with BSD 3-Clause "New" or "Revised" License

5 votes

def precision_recall_statistics(outputs_scores_np, targets):
    '''
    :param outputs_scores_np: numpy array with outputs scores
    :param targets: numpy array with targets
    :return: dictionary, average precision from precision recall graph for each class and macro averaged
    '''
    classes_num = outputs_scores_np.shape[1]
    average_precision = dict()
    for i in range(classes_num):
        average_precision[i] = average_precision_score(targets[:, i], outputs_scores_np[:, i])
    average_precision["macro"] = average_precision_score(targets, outputs_scores_np, average="macro")
    return average_precision

Source File: evaluation_utils.py From keras-anomaly-detection with MIT License

5 votes

def report_evaluation_metrics(y_true, y_pred):
    average_precision = average_precision_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)

    print('Average precision-recall score: {0:0.2f}'.format(average_precision))
    print('Precision: {0:0.2f}'.format(precision))
    print('Recall: {0:0.2f}'.format(recall))
    print('F1: {0:0.2f}'.format(f1))

Source File: average_precision.py From LaSO with BSD 3-Clause "New" or "Revised" License

5 votes

def average_precision_compute_fn(y_preds, y_targets, activation=None):
    try:
        from sklearn.metrics import average_precision_score
    except ImportError:
        raise RuntimeError("This contrib module requires sklearn to be installed.")

    y_true = y_targets.numpy()
    if activation is not None:
        y_preds = activation(y_preds)
    y_pred = y_preds.numpy()
    return average_precision_score(y_true, y_pred)

Source File: metrics.py From BirdCLEF-Baseline with MIT License

5 votes

def averagePrecision(prediction, target):

    # Calculate average precision for every sample
    return average_precision_score(target, prediction, average='samples')

Source File: ranking.py From open-reid with MIT License

5 votes

def mean_ap(distmat, query_ids=None, gallery_ids=None,
            query_cams=None, gallery_cams=None):
    distmat = to_numpy(distmat)
    m, n = distmat.shape
    # Fill up default values
    if query_ids is None:
        query_ids = np.arange(m)
    if gallery_ids is None:
        gallery_ids = np.arange(n)
    if query_cams is None:
        query_cams = np.zeros(m).astype(np.int32)
    if gallery_cams is None:
        gallery_cams = np.ones(n).astype(np.int32)
    # Ensure numpy array
    query_ids = np.asarray(query_ids)
    gallery_ids = np.asarray(gallery_ids)
    query_cams = np.asarray(query_cams)
    gallery_cams = np.asarray(gallery_cams)
    # Sort and find correct matches
    indices = np.argsort(distmat, axis=1)
    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])
    # Compute AP for each query
    aps = []
    for i in range(m):
        # Filter out the same id and same camera
        valid = ((gallery_ids[indices[i]] != query_ids[i]) |
                 (gallery_cams[indices[i]] != query_cams[i]))
        y_true = matches[i, valid]
        y_score = -distmat[i][indices[i]][valid]
        if not np.any(y_true): continue
        aps.append(average_precision_score(y_true, y_score))
    if len(aps) == 0:
        raise RuntimeError("No valid query")
    return np.mean(aps)

Source File: metrics.py From DTPP with BSD 2-Clause "Simplified" License

5 votes

def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap

Python sklearn.metrics.average_precision_score() Examples