Python sklearn.metrics.average_precision_score() Examples

The following are 30 code examples of sklearn.metrics.average_precision_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: metrics_util.py    From DeepLearningSmells with Apache License 2.0 8 votes vote down vote up
def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr 
Example #2
Source File: utils.py    From node_embedding_attack with MIT License 6 votes vote down vote up
def evaluate_embedding_link_prediction(adj_matrix, node_pairs, embedding_matrix, norm=False):
    """Evaluate the node embeddings on the link prediction task.

    :param adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
        Adjacency matrix of the graph
    :param node_pairs:
    :param embedding_matrix: np.ndarray, shape [n_nodes, embedding_dim]
        Embedding matrix
    :param norm: bool
        Whether to normalize the embeddings
    :return: float, float
        Average precision (AP) score and area under ROC curve (AUC) score
    """
    if norm:
        embedding_matrix = normalize(embedding_matrix)

    true = adj_matrix[node_pairs[:, 0], node_pairs[:, 1]].A1
    scores = (embedding_matrix[node_pairs[:, 0]] * embedding_matrix[node_pairs[:, 1]]).sum(1)

    auc_score, ap_score = roc_auc_score(true, scores), average_precision_score(true, scores)

    return auc_score, ap_score 
Example #3
Source File: autoencoder.py    From pytorch_geometric with MIT License 6 votes vote down vote up
def test(self, z, pos_edge_index, neg_edge_index):
        r"""Given latent variables :obj:`z`, positive edges
        :obj:`pos_edge_index` and negative edges :obj:`neg_edge_index`,
        computes area under the ROC curve (AUC) and average precision (AP)
        scores.

        Args:
            z (Tensor): The latent space :math:`\mathbf{Z}`.
            pos_edge_index (LongTensor): The positive edges to evaluate
                against.
            neg_edge_index (LongTensor): The negative edges to evaluate
                against.
        """
        pos_y = z.new_ones(pos_edge_index.size(1))
        neg_y = z.new_zeros(neg_edge_index.size(1))
        y = torch.cat([pos_y, neg_y], dim=0)

        pos_pred = self.decoder(z, pos_edge_index, sigmoid=True)
        neg_pred = self.decoder(z, neg_edge_index, sigmoid=True)
        pred = torch.cat([pos_pred, neg_pred], dim=0)

        y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()

        return roc_auc_score(y, pred), average_precision_score(y, pred) 
Example #4
Source File: test_ranking.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_average_precision_score_pos_label_errors():
    # Raise an error when pos_label is not in binary y_true
    y_true = np.array([0, 1])
    y_pred = np.array([0, 1])
    error_message = ("pos_label=2 is invalid. Set it to a label in y_true.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=2)
    # Raise an error for multilabel-indicator y_true with
    # pos_label other than 1
    y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
    y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]])
    error_message = ("Parameter pos_label is fixed to 1 for multilabel"
                     "-indicator y_true. Do not set pos_label or set "
                     "pos_label to 1.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=0) 
Example #5
Source File: metrics_util.py    From DeepLearningSmells with Apache License 2.0 6 votes vote down vote up
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr 
Example #6
Source File: cnn_train.py    From RelationNetworks-CLEVR with MIT License 6 votes vote down vote up
def test(data, model, epoch, args):
    model.eval()

    n_iters = 0
    ap_sum = 0.0

    progress_bar = tqdm(data)
    for batch_idx, sample_batched in enumerate(progress_bar):
        img, target = load_tensor_data(sample_batched, args.cuda, volatile=True)
        
        output = model(img)
        ap = average_precision_score(target.data, output.data) 
        n_iters += 1
        ap_sum += ap
        if batch_idx % args.log_interval == 0:
            m_ap = ap_sum / n_iters
            progress_bar.set_postfix(dict(AP='{:.2}'.format(m_ap)))

    m_ap = ap_sum / n_iters
    print('Test Epoch {}: Avg. Precision Score = {:.2};'.format(epoch, m_ap)) 
Example #7
Source File: main.py    From AutoOut with MIT License 6 votes vote down vote up
def calculate_scores(y_predicted, y_true):
    """
    Function to calculate different performance scores
    """
    accuracy = accuracy_score(y_pred=y_predicted, y_true=y_true)
    precision = precision_score(y_pred=y_predicted, y_true=y_true)
    average_precision_score1 = average_precision_score(y_score=y_predicted, y_true=y_true)
    f1_score1 = f1_score(y_pred=y_predicted, y_true=y_true)

    print("Accuracy score:", accuracy)
    print("Precision score:", precision)
    print("Average Precision score:", average_precision_score1)
    print("F1 score:", f1_score1)
    print("Outlier detection and/or treatment completed.")

    return {"accuracy": accuracy,
            "precision": precision,
            "average_precision_score": average_precision_score1,
            "f1_score": f1_score1,
            } 
Example #8
Source File: test_ranking.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def _average_precision_slow(y_true, y_score):
    """A second alternative implementation of average precision that closely
    follows the Wikipedia article's definition (see References). This should
    give identical results as `average_precision_score` for all inputs.

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
       <https://en.wikipedia.org/wiki/Average_precision>`_
    """
    precision, recall, threshold = precision_recall_curve(y_true, y_score)
    precision = list(reversed(precision))
    recall = list(reversed(recall))
    average_precision = 0
    for i in range(1, len(precision)):
        average_precision += precision[i] * (recall[i] - recall[i - 1])
    return average_precision 
Example #9
Source File: test_ranking.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_score_scale_invariance():
    # Test that average_precision_score and roc_auc_score are invariant by
    # the scaling or shifting of probabilities
    # This test was expanded (added scaled_down) in response to github
    # issue #3864 (and others), where overly aggressive rounding was causing
    # problems for users with very small y_score values
    y_true, _, probas_pred = make_prediction(binary=True)

    roc_auc = roc_auc_score(y_true, probas_pred)
    roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
    roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled_up)
    assert_equal(roc_auc, roc_auc_scaled_down)
    assert_equal(roc_auc, roc_auc_shifted)

    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled_up)
    assert_equal(pr_auc, pr_auc_scaled_down)
    assert_equal(pr_auc, pr_auc_shifted) 
Example #10
Source File: utils.py    From graph2gauss with MIT License 6 votes vote down vote up
def score_link_prediction(labels, scores):
    """
    Calculates the area under the ROC curve and the average precision score.

    Parameters
    ----------
    labels : array-like, shape [N]
        The ground truth labels
    scores : array-like, shape [N]
        The (unnormalized) scores of how likely are the instances

    Returns
    -------
    roc_auc : float
        Area under the ROC curve score
    ap : float
        Average precision score
    """

    return roc_auc_score(labels, scores), average_precision_score(labels, scores) 
Example #11
Source File: metrics.py    From LaSO with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def average_precision_compute_fn(y_preds, y_targets, mask, activation=None):
    try:
        from sklearn.metrics import average_precision_score
    except ImportError:
        raise RuntimeError("This contrib module requires sklearn to be installed.")

    y_true = y_targets.numpy()
    if activation is not None:
        y_preds = activation(y_preds)
    y_pred = y_preds.numpy()

    if mask is not None:
        y_true = y_true[:, mask]
        y_pred = y_pred[:, mask]

    return average_precision_score(y_true, y_pred) 
Example #12
Source File: oxford.py    From paiss with MIT License 6 votes vote down vote up
def get_ap(self, q_name, sorted_idx):
        rel   = self.__relevants[q_name]
        junk  = self.__junk[q_name]

        # construct ground-truth and scores:
        y_scores = np.zeros(self.N_images)
        y_true   = np.zeros(self.N_images)
        for e,i in enumerate(sorted_idx): y_scores[i] = self.N_images - e
        for i in rel: y_true[i] = 1

        # remove junk:
        y_scores = np.delete(y_scores, junk)
        y_true   = np.delete(y_true, junk)

        # compute ap:
        return average_precision_score(y_true, y_scores) 
Example #13
Source File: classifier.py    From Fake_News_Detection with MIT License 6 votes vote down vote up
def plot_PR_curve(classifier):
    
    precision, recall, thresholds = precision_recall_curve(DataPrep.test_news['Label'], classifier)
    average_precision = average_precision_score(DataPrep.test_news['Label'], classifier)
    
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Random Forest Precision-Recall curve: AP={0:0.2f}'.format(
              average_precision)) 
Example #14
Source File: learning.py    From deep-action-proposals with MIT License 6 votes vote down vote up
def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program ################################# 
Example #15
Source File: utils.py    From s3d.pytorch with MIT License 5 votes vote down vote up
def get_AP_video(rst,label,gt_labels):
    gt_labels_to1=np.array([1 if (_==label and label!=0) else 0 for _ in gt_labels])
    rst_to1=np.array(rst[_][label] for _ in range(rst))

    AP=average_precision_score(gt_labels_to1,rst_to1)
    return AP 
Example #16
Source File: evaluate.py    From bird-species-classification with MIT License 5 votes vote down vote up
def mean_average_precision(y_trues, y_scores):
    """
    y_trues  : [nb_samples, nb_classes]
    y_scores : [nb_samples, nb_classes]

    map      : float (MAP)
    """
    aps = []
    for y_t, y_s in zip(y_trues, y_scores):
        ap = metrics.average_precision_score(y_t, y_s)
        aps.append(ap)
    return np.mean(np.array(aps)) 
Example #17
Source File: learning_analysis.py    From autolab_core with Apache License 2.0 5 votes vote down vote up
def ap_score(self):
        pred_probs_vec, labels_vec = self.label_vectors
        ap = 0.0
        try:
            ap = sm.average_precision_score(labels_vec, pred_probs_vec)
        except:
            pass
        return ap 
Example #18
Source File: test_sklearn_metrics.py    From poutyne with GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_classification_with_custom_names(self):
        roc_names = 'roc'
        self._test_classification(roc_auc_score, True, names=roc_names)
        self._test_classification(roc_auc_score, False, names=roc_names)

        ap_names = 'ap'
        self._test_classification(average_precision_score, True, names=ap_names)
        self._test_classification(average_precision_score, False, names=ap_names)

        two_names = ['roc', 'ap']
        two_skl_metrics = [roc_auc_score, average_precision_score]
        self._test_classification(two_skl_metrics, True, names=two_names)
        self._test_classification(two_skl_metrics, False, names=two_names) 
Example #19
Source File: metrics.py    From video-long-term-feature-banks with Apache License 2.0 5 votes vote down vote up
def mean_ap_metric(predicts, targets):
    """Compute mAP, wAP, AUC for Charades."""

    predicts = np.vstack(predicts)
    targets = np.vstack(targets)
    logger.info(
        "Getting mAP for {} examples".format(
            predicts.shape[0]
        ))
    start_time = time.time()

    predict = predicts[:, ~np.all(targets == 0, axis=0)]
    target = targets[:, ~np.all(targets == 0, axis=0)]
    mean_auc = 0
    aps = [0]
    try:
        mean_auc = metrics.roc_auc_score(target, predict)
    except ValueError:
        print(
            'The roc_auc curve requires a sufficient number of classes \
            which are missing in this sample.'
        )
    try:
        aps = metrics.average_precision_score(target, predict, average=None)
    except ValueError:
        print(
            'Average precision requires a sufficient number of samples \
            in a batch which are missing in this sample.'
        )

    mean_ap = np.mean(aps)
    weights = np.sum(target.astype(float), axis=0)
    weights /= np.sum(weights)
    mean_wap = np.sum(np.multiply(aps, weights))
    all_aps = np.zeros((1, targets.shape[1]))
    all_aps[:, ~np.all(targets == 0, axis=0)] = aps

    logger.info('\tDone in {} seconds'.format(time.time() - start_time))
    return mean_auc, mean_ap, mean_wap, all_aps.flatten() 
Example #20
Source File: metrics.py    From temporal-segment-networks with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap 
Example #21
Source File: custom_scores_HO.py    From Auto_ViML with Apache License 2.0 5 votes vote down vote up
def gini_average_precision(truth, predictions):
    return 1-average_precision_score(truth, predictions.argmax(axis=1),average='weighted') 
Example #22
Source File: custom_scores.py    From Auto_ViML with Apache License 2.0 5 votes vote down vote up
def gini_average_precision(truth, predictions):
    return average_precision_score(truth, predictions.argmax(axis=1),average='weighted') 
Example #23
Source File: analysis.py    From PointNetGPD with MIT License 5 votes vote down vote up
def ap_score(self):
        pred_probs_vec, labels_vec = self.label_vectors()
        return sm.average_precision_score(labels_vec, pred_probs_vec) 
Example #24
Source File: eval.py    From Pyramid-Attention-Networks-pytorch with GNU General Public License v3.0 5 votes vote down vote up
def test(data_loader):
    global best_acc
    convnet.eval()
    pan.eval()
    all_i_count = []
    all_u_count = []
    y_true = []
    y_pred = []
    loss_ic = 0
    pixel_acc = 0
    for batch_idx, (imgs, cls_labels, mask_labels) in enumerate(data_loader):
        with torch.no_grad():
            imgs, cls_labels = imgs.to(device), cls_labels.to(device)
            imgs_ab = imgs[:, 1:, :, :]
            fms_blob, out_convnet = convnet(imgs)
            out_cls = classifier(out_convnet.detach())
            out_pan = pan(fms_blob[::-1])
            mask_pred = mask_classifier(out_pan)
            mask_pred = F.interpolate(mask_pred, scale_factor=4, mode='nearest')

            # Image Colorization Loss
            color_pred = color_classifier(out_pan)
            color_pred = F.interpolate(color_pred, scale_factor=4, mode='nearest')
            loss_ic += F.mse_loss(color_pred, imgs_ab).item()

        # results
        y_pred.append(torch.sigmoid(out_cls).data.cpu().numpy())
        y_true.append(cls_labels.data.cpu().numpy())

        i_count, u_count = get_each_cls_iu(mask_pred.max(1)[1].cpu().data.numpy(), mask_labels.long().squeeze(1).numpy())
        all_i_count.append(i_count)
        all_u_count.append(u_count)
        pixel_acc += mask_pred.max(dim=1)[1].data.cpu().eq(mask_labels.long().squeeze(1)).float().mean().item()

    # Result
    acc = average_precision_score(np.concatenate(y_true, 0), np.concatenate(y_pred, 0))
    each_cls_IOU = (np.array(all_i_count).sum(0) / np.array(all_u_count).sum(0))
    mIOU = each_cls_IOU.mean()
    pixel_acc = pixel_acc / length_test_dataset
    loss_ic = loss_ic / length_test_dataset
    logging.info("Length of test set:{:} Test Cls Acc:{:.4f}% Each_cls_IOU:{:} mIOU:{:.4f} PA:{:.2f}% loss_ic{:.6f}".format(length_test_dataset, acc*100, dict(zip(test_data.classes, (100*each_cls_IOU).tolist())), mIOU*100, pixel_acc*100, loss_ic)) 
Example #25
Source File: utils.py    From LaSO with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def precision_recall_statistics(outputs_scores_np, targets):
    '''
    :param outputs_scores_np: numpy array with outputs scores
    :param targets: numpy array with targets
    :return: dictionary, average precision from precision recall graph for each class and macro averaged
    '''
    classes_num = outputs_scores_np.shape[1]
    average_precision = dict()
    for i in range(classes_num):
        average_precision[i] = average_precision_score(targets[:, i], outputs_scores_np[:, i])
    average_precision["macro"] = average_precision_score(targets, outputs_scores_np, average="macro")
    return average_precision 
Example #26
Source File: evaluation_utils.py    From keras-anomaly-detection with MIT License 5 votes vote down vote up
def report_evaluation_metrics(y_true, y_pred):
    average_precision = average_precision_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)

    print('Average precision-recall score: {0:0.2f}'.format(average_precision))
    print('Precision: {0:0.2f}'.format(precision))
    print('Recall: {0:0.2f}'.format(recall))
    print('F1: {0:0.2f}'.format(f1)) 
Example #27
Source File: average_precision.py    From LaSO with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def average_precision_compute_fn(y_preds, y_targets, activation=None):
    try:
        from sklearn.metrics import average_precision_score
    except ImportError:
        raise RuntimeError("This contrib module requires sklearn to be installed.")

    y_true = y_targets.numpy()
    if activation is not None:
        y_preds = activation(y_preds)
    y_pred = y_preds.numpy()
    return average_precision_score(y_true, y_pred) 
Example #28
Source File: metrics.py    From BirdCLEF-Baseline with MIT License 5 votes vote down vote up
def averagePrecision(prediction, target):

    # Calculate average precision for every sample
    return average_precision_score(target, prediction, average='samples') 
Example #29
Source File: ranking.py    From open-reid with MIT License 5 votes vote down vote up
def mean_ap(distmat, query_ids=None, gallery_ids=None,
            query_cams=None, gallery_cams=None):
    distmat = to_numpy(distmat)
    m, n = distmat.shape
    # Fill up default values
    if query_ids is None:
        query_ids = np.arange(m)
    if gallery_ids is None:
        gallery_ids = np.arange(n)
    if query_cams is None:
        query_cams = np.zeros(m).astype(np.int32)
    if gallery_cams is None:
        gallery_cams = np.ones(n).astype(np.int32)
    # Ensure numpy array
    query_ids = np.asarray(query_ids)
    gallery_ids = np.asarray(gallery_ids)
    query_cams = np.asarray(query_cams)
    gallery_cams = np.asarray(gallery_cams)
    # Sort and find correct matches
    indices = np.argsort(distmat, axis=1)
    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])
    # Compute AP for each query
    aps = []
    for i in range(m):
        # Filter out the same id and same camera
        valid = ((gallery_ids[indices[i]] != query_ids[i]) |
                 (gallery_cams[indices[i]] != query_cams[i]))
        y_true = matches[i, valid]
        y_score = -distmat[i][indices[i]][valid]
        if not np.any(y_true): continue
        aps.append(average_precision_score(y_true, y_score))
    if len(aps) == 0:
        raise RuntimeError("No valid query")
    return np.mean(aps) 
Example #30
Source File: metrics.py    From DTPP with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap