Python Examples of sklearn.metrics.recall

Source File: multi_class_classification.py From edge2vec with BSD 3-Clause "New" or "Revised" License

11 votes

def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

8 votes

def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr

Source File: link_prediction.py From edge2vec with BSD 3-Clause "New" or "Revised" License

7 votes

def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_prf_average_binary_data_non_binary():
    # Error if user does not explicitly set non-binary average mode
    y_true_mc = [1, 2, 3, 3]
    y_pred_mc = [1, 2, 3, 1]
    msg_mc = ("Target is multiclass but average='binary'. Please "
              "choose another average setting, one of ["
              "None, 'micro', 'macro', 'weighted'].")
    y_true_ind = np.array([[0, 1, 1], [1, 0, 0], [0, 0, 1]])
    y_pred_ind = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    msg_ind = ("Target is multilabel-indicator but average='binary'. Please "
               "choose another average setting, one of ["
               "None, 'micro', 'macro', 'weighted', 'samples'].")

    for y_true, y_pred, msg in [
        (y_true_mc, y_pred_mc, msg_mc),
        (y_true_ind, y_pred_ind, msg_ind),
    ]:
        for metric in [precision_score, recall_score, f1_score,
                       partial(fbeta_score, beta=2)]:
            assert_raise_message(ValueError, msg,
                                 metric, y_true, y_pred)

Source File: stats_metrics.py From fanci with GNU General Public License v3.0

6 votes

def add_run(self, y_true, y_pred, domains_test):
        """
        Add a completed run
        :param domains_test:
        :param y_true: true labels
        :param y_pred:  predicted labels
        :return:
        """
        log.verbose('Adding run.\ny_true: {!s}\ny_pred: {!s}'.format(y_true, y_pred))
        self.ys.append((y_true, y_pred))
        self.y_true = numpy.concatenate((self.y_true, y_true))
        self.y_pred = numpy.concatenate((self.y_pred, y_pred))
        self.cms.append(confusion_matrix(y_true, y_pred))
        self.scores = {'accuracy': [accuracy_score(y_true, y_pred)],
                       'precision': [precision_score(y_true, y_pred)],
                       'recall': [recall_score(y_true, y_pred)],
                       'roc': [roc_auc_score(y_true, y_pred)],
                       'f1': [f1_score(y_true, y_pred)]}
        for i in range(len(y_true)):
            if y_true[i] != y_pred[i]:
                self.missclassified.append((domains_test[i], y_true[i]))

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, texts, labels, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        assert texts.size(0) == len(labels)
        vec_predict = model(texts)
        soft_predict = torch.softmax(vec_predict, dim=1)
        predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
        # print('prob', predict_prob)
        # print('index', predict_index)
        # print('labels', labels)
        labels = labels.view(-1).cpu().data.numpy()
        return metric_func(predict_index, labels, average='micro')

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, texts, labels, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        assert len(texts) == len(labels)
        vec_predict = model(texts)
        soft_predict = torch.softmax(vec_predict, dim=1)
        predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
        # print('prob', predict_prob)
        # print('index', predict_index)
        # print('labels', labels)
        labels = labels.view(-1).cpu().data.numpy()
        return metric_func(predict_index, labels, average='micro')

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

6 votes

def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr

Source File: utils.py From TensorFlow_DCIGN with MIT License

6 votes

def evaluate_precision_recall(y, target, labels):
  import sklearn.metrics as metrics
  target = target[:len(y)]
  num_classes = max(target) + 1
  results = []
  for i in range(num_classes):
    class_target = _extract_single_class(i, target)
    class_y = _extract_single_class(i, y)

    results.append({
      'precision': metrics.precision_score(class_target, class_y),
      'recall': metrics.recall_score(class_target, class_y),
      'f1': metrics.f1_score(class_target, class_y),
      'fraction': sum(class_target)/len(target),
      '#of_class': int(sum(class_target)),
      'label': labels[i],
      'label_id': i
      # 'tp': tp
    })
    print('%d/%d' % (i, num_classes), results[-1])
  accuracy = metrics.accuracy_score(target, y)
  return accuracy, results

Source File: metrics.py From toxic_comments with MIT License

6 votes

def calc_metrics(y_true, y_hat, max_steps=1000):
    y_true = np.array(y_true)
    y_hat = np.array(y_hat)
    metrics = {}
    metrics['Logloss'] = float(log_loss(y_true, y_hat))
    metrics['AUC'] = roc_auc_score(y_true, y_hat)
    metrics['F1'] = []
    metrics['Precision'] = []
    metrics['Recall'] = []
    for i in range(1, max_steps):
        threshold = float(i) / max_steps
        y_tmp = y_hat > threshold
        metrics['F1'].append(f1_score(y_true, y_tmp))
        metrics['Precision'].append(precision_score(y_true, y_tmp))
        metrics['Recall'].append(recall_score(y_true, y_tmp))
    max_idx = np.argmax(metrics['F1'])
    metrics['F1'] = metrics['F1'][max_idx]
    metrics['Precision'] = metrics['Precision'][max_idx]
    metrics['Recall'] = metrics['Recall'][max_idx]
    metrics['Threshold'] = float(max_idx + 1) / max_steps
    return metrics

Source File: utils.py From Attention-Gated-Networks with MIT License

6 votes

def classification_scores(gts, preds, labels):
    accuracy        = metrics.accuracy_score(gts,  preds)
    class_accuracies = []
    for lab in labels: # TODO Fix
        class_accuracies.append(metrics.accuracy_score(gts[gts == lab], preds[gts == lab]))
    class_accuracies = np.array(class_accuracies)

    f1_micro        = metrics.f1_score(gts,        preds, average='micro')
    precision_micro = metrics.precision_score(gts, preds, average='micro')
    recall_micro    = metrics.recall_score(gts,    preds, average='micro')
    f1_macro        = metrics.f1_score(gts,        preds, average='macro')
    precision_macro = metrics.precision_score(gts, preds, average='macro')
    recall_macro    = metrics.recall_score(gts,    preds, average='macro')

    # class wise score
    f1s        = metrics.f1_score(gts,        preds, average=None)
    precisions = metrics.precision_score(gts, preds, average=None)
    recalls    = metrics.recall_score(gts,    preds, average=None)

    confusion = metrics.confusion_matrix(gts,preds, labels=labels)

    #TODO confusion matrix, recall, precision
    return accuracy, f1_micro, precision_micro, recall_micro, f1_macro, precision_macro, recall_macro, confusion, class_accuracies, f1s, precisions, recalls

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, src, src_lens, trg, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        output = model(src, src_lens, trg)
        output = output[1:].contiguous()
        output = output.view(-1, output.shape[-1])
        trg = trg.transpose(1, 0)
        trg = trg[1:].contiguous()
        trg = trg.view(-1)
        soft_predict = torch.softmax(output, dim=1)
        predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
        labels = trg.cpu().data.numpy()
        return metric_func(predict_index, labels, average='micro')

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, x, y, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        outputs, dep_graph, actions_done = model(x)
        assert len(actions_done) == len(y)
        predict_y = actions_done
        true_y = y.cpu().view(-1).tolist()
        # print(actions_done, y)
        # print(actions_done)
        # print(true_y)
        return metric_func(predict_y, true_y, average='micro')

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, src, src_lens, trg, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        output = model(src, src_lens, trg)
        output = output[1:].contiguous()
        output = output.view(-1, output.shape[-1])
        trg = trg.transpose(1, 0)
        trg = trg[1:].contiguous()
        trg = trg.view(-1)
        soft_predict = torch.softmax(output, dim=1)
        predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
        labels = trg.cpu().data.numpy()
        return metric_func(predict_index, labels, average='micro')

Source File: pcnn_model.py From PCNN with Apache License 2.0

6 votes

def run_evaluate(self, test):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, relation tags)

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        y_true, y_pred = [], []
        for data in minibatches(test, self.config.batch_size):
            word_batch, pos1_batch, pos2_batch, pos_batch, y_batch = data
            relations_pred = self.predict_batch(word_batch, pos1_batch, pos2_batch, pos_batch)
            assert len(relations_pred) == len(y_batch)
            y_true += y_batch
            y_pred += relations_pred.tolist()

        acc = accuracy_score(y_true, y_pred)
        p   = precision_score(y_true, y_pred, average='macro')
        r   = recall_score(y_true, y_pred, average='macro')
        f1  = f1_score(y_true, y_pred, average='macro')

        return {"acc":acc, "p":p, "r":r, "f1":f1}

Source File: textpro.py From comparable-text-miner with Apache License 2.0

6 votes

def evaluate(trueValues, predicted, decimals, note):
	print note
	label = 1
	avg = 'weighted'
	a = accuracy_score(trueValues, predicted)
	p = precision_score(trueValues, predicted, pos_label=label, average=avg)
	r = recall_score(trueValues, predicted, pos_label=label, average=avg)
	avg_f1 = f1_score(trueValues, predicted, pos_label=label, average=avg)
	fclasses = f1_score(trueValues, predicted, average=None)
	f1c1 = fclasses[0]; f1c2 = fclasses[1]
	fw = (f1c1 + f1c2)/2.0

	print 'accuracy:\t', str(round(a,decimals))
	print 'precision:\t', str(round(p,decimals))
	print 'recall:\t', str(round(r,decimals))
	print 'avg f1:\t', str(round(avg_f1,decimals))
	print 'c1 f1:\t', str(round(f1c1,decimals))
	print 'c2 f1:\t', str(round(f1c2,decimals))
	print 'avg(c1,c2):\t', str(round(fw,decimals))
	print '------------'

###################################################################################


# split a parallel or comparable corpus into two parts

Source File: conv_featuremaps_visualization.py From MCF-3D-CNN with MIT License

6 votes

def accuracy(y_true, y_pred):        
    # 计算混淆矩阵
    y = np.zeros(len(y_true))
    y_ = np.zeros(len(y_true))    
    for i in range(len(y_true)): 
        y[i] = np.argmax(y_true[i,:])
        y_[i] = np.argmax(y_pred[i,:])
    cnf_mat = confusion_matrix(y, y_)
    
    # Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
    # Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
    # Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
    
    # # 绘制ROC曲线
    # fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
    # Auc = auc(fpr, tpr)
    
    
    # 计算多分类评价值
    Sens = recall_score(y, y_, average='macro')
    Prec = precision_score(y, y_, average='macro')
    F1 = f1_score(y, y_, average='weighted') 
    Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
    return Sens, Prec, F1, cnf_mat

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, x, y, pos, rel, field_x, field_y, field_pos, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        vec_x = torch.tensor([field_x.stoi[i] for i in x])
        len_vec_x = torch.tensor([len(vec_x)]).to(DEVICE)
        vec_pos = torch.tensor([field_pos.stoi[i] for i in pos])
        vec_rel = torch.tensor([int(x) for x in rel])
        predict_y = model(vec_x.view(-1, 1).to(DEVICE), vec_pos.view(-1, 1).to(DEVICE), vec_rel.view(-1, 1).to(DEVICE),
                          len_vec_x)[0]
        true_y = [field_y.stoi[i] for i in y]
        assert len(true_y) == len(predict_y)
        return metric_func(predict_y, true_y, average='micro')

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, texta, textb, labels, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        assert texta.size(1) == textb.size(1) == len(labels)
        predict_prob = model(texta, textb)
        # print('predict', predict_prob)
        # print('labels', labels)
        predict_labels = torch.gt(predict_prob, 0.5)
        predict_labels = predict_labels.view(-1).cpu().data.numpy()
        labels = labels.view(-1).cpu().data.numpy()
        return metric_func(predict_labels, labels, average='micro')

Source File: tool.py From lightNLP with Apache License 2.0

6 votes

def get_score(self, model, texta, textb, labels, score_type='f1'):
        metrics_map = {
            'f1': f1_score,
            'p': precision_score,
            'r': recall_score,
            'acc': accuracy_score
        }
        metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
        assert texta.size(1) == textb.size(1) == len(labels)
        vec_predict = model(texta, textb)
        soft_predict = torch.softmax(vec_predict, dim=1)
        predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
        # print('prob', predict_prob)
        # print('index', predict_index)
        # print('labels', labels)
        labels = labels.view(-1).cpu().data.numpy()
        return metric_func(predict_index, labels, average='micro')

Source File: test_recall.py From tf_metrics with Apache License 2.0

6 votes

def test_recall_op(generator_fn, y_true_all, y_pred_all, pos_indices,
                   average):
    # Precision on the whole dataset
    pr_sk = recall_score(
        y_true_all, y_pred_all, pos_indices, average=average)

    # Create Tensorflow graph
    ds = tf.data.Dataset.from_generator(
        generator_fn, (tf.int32, tf.int32), ([None], [None]))
    y_true, y_pred = ds.make_one_shot_iterator().get_next()
    pr_tf = tf_metrics.recall(y_true, y_pred, 4, pos_indices, average=average)

    with tf.Session() as sess:
        # Initialize and run the update op on each batch
        sess.run(tf.local_variables_initializer())
        while True:
            try:
                sess.run(pr_tf[1])
            except OutOfRangeError as e:
                break

        # Check final value
        assert np.allclose(sess.run(pr_tf[0]), pr_sk)

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_ovr_multilabel_dataset():
    base_clf = MultinomialNB(alpha=1)
    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=2,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test, Y_test = X[80:], Y[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
        Y_pred = clf.predict(X_test)

        assert clf.multilabel_
        assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
                            prec,
                            decimal=2)
        assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
                            recall,
                            decimal=2)

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
                            recall_13(average='weighted'))
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
            assert_not_equal(recall_13(average=average),
                             recall_all(average=average))

Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_grid_search_cv_results_multimetric():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
              dict(kernel=['poly', ], degree=[1, 2])]

    for iid in (False, True):
        grid_searches = []
        for scoring in ({'accuracy': make_scorer(accuracy_score),
                         'recall': make_scorer(recall_score)},
                        'accuracy', 'recall'):
            grid_search = GridSearchCV(SVC(gamma='scale'), cv=n_splits,
                                       iid=iid, param_grid=params,
                                       scoring=scoring, refit=False)
            grid_search.fit(X, y)
            assert_equal(grid_search.iid, iid)
            grid_searches.append(grid_search)

        compare_cv_results_multimetric_with_single(*grid_searches, iid=iid)

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_recall_warnings():
    assert_no_warnings(recall_score,
                       np.array([[1, 1], [1, 1]]),
                       np.array([[0, 0], [0, 0]]),
                       average='micro')
    clean_warning_registry()
    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')
        recall_score(np.array([[0, 0], [0, 0]]),
                     np.array([[1, 1], [1, 1]]),
                     average='micro')
        assert_equal(str(record.pop().message),
                     'Recall is ill-defined and '
                     'being set to 0.0 due to no true samples.')
        recall_score([0, 0], [0, 0])
        assert_equal(str(record.pop().message),
                     'Recall is ill-defined and '
                     'being set to 0.0 due to no true samples.')

Source File: test_recall.py From tf_metrics with Apache License 2.0

5 votes

def test_recall(generator_fn, pos_indices, average):
    for y_true, y_pred in generator_fn():
        pr_tf = tf_metrics.recall(
            y_true, y_pred, 4, pos_indices, average=average)
        pr_sk = recall_score(
            y_true, y_pred, pos_indices, average=average)
        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            assert np.allclose(sess.run(pr_tf[1]), pr_sk)

Source File: stats_metrics.py From fanci with GNU General Public License v3.0

5 votes

def _get_scores(estimator, x, y):
    """
    Helper for sklearn, required to be able to score by more than one metric.
    :param estimator:
    :param x:
    :param y:
    :return:
    """
    y_pred = estimator.predict(x)
    return (accuracy_score(y, y_pred),
            precision_score(y, y_pred),
            recall_score(y, y_pred),
            roc_auc_score(y, y_pred),
            f1_score(y, y_pred))

Source File: evaluation.py From LSTM-CRF-models with MIT License

5 votes

def get_Approx_Metrics(y_true,y_pred,verbose =True,preMsg='',flat_list=False):

    if verbose:
        print('------------------------ Approx Metrics---------------------------')
    if flat_list:
        z_true=y_true
        z_pred=y_pred
    else:
        z_true=list(itertools.chain.from_iterable(y_true))
        z_pred=list(itertools.chain.from_iterable(y_pred))
    z_true=[token[2:] if token[:2]=='B-' else token for token in z_true]
    z_pred=[token[2:] if token[:2]=='B-' else token for token in z_pred]
    label_dict={x:i for i,x in enumerate(list(set(z_true) | set(z_pred)))}
    freq_dict=collections.Counter(z_true)
    z_true=[ label_dict[x] for x in z_true]
    z_pred=[ label_dict[x] for x in z_pred]
    f1s= f1_score(z_true,z_pred, average=None)
    rs= recall_score(z_true,z_pred, average=None)
    ps= precision_score(z_true,z_pred, average=None)
    results =[]
    f1_none=[]
    avg_recall=avg_precision=avg_f1=0.0
    for i in label_dict:
            if verbose:
                print("{5} The tag \'{0}\' has {1} elements and recall,precision,f1 ={3},{4}, {2}".format(i,freq_dict[i],f1s[label_dict[i]],rs[label_dict[i]],ps[label_dict[i]],preMsg))
            if i!='None' and i!='|O':
                f1_none=f1_none+[(f1s[label_dict[i]],freq_dict[i]),]
                avg_recall+=float(rs[label_dict[i]])*float(freq_dict[i])
                avg_precision+=float(ps[label_dict[i]])*float(freq_dict[i])
    intermediate_sum=sum([float(z[1]) for z in f1_none])
    if intermediate_sum ==0:
        intermediate_sum +=1
    avg_recall =float(avg_recall)/float(intermediate_sum)
    avg_precision =float(avg_precision)/float(intermediate_sum)
    if (float(avg_recall)+float(avg_precision)) !=0.0:
        avg_f1=2.0*float(avg_precision)*float(avg_recall)/(float(avg_recall)+float(avg_precision))
    else:
        avg_f1=0.0
    if verbose:
        print("All medical tags collectively have {0} elements and recall,precision,f1 ={1},{2}, {3}".format(intermediate_sum,avg_recall,avg_precision,avg_f1))
    return avg_f1

Source File: utils.py From bigdata18 with GNU General Public License v3.0

5 votes

def calculate_metrics(y_true, y_pred,duration,y_true_val=None,y_pred_val=None): 
    res = pd.DataFrame(data = np.zeros((1,4),dtype=np.float), index=[0], 
        columns=['precision','accuracy','recall','duration'])
    res['precision'] = precision_score(y_true,y_pred,average='macro')
    res['accuracy'] = accuracy_score(y_true,y_pred)
    
    if not y_true_val is None:
        # this is useful when transfer learning is used with cross validation
        res['accuracy_val'] = accuracy_score(y_true_val,y_pred_val)

    res['recall'] = recall_score(y_true,y_pred,average='macro')
    res['duration'] = duration
    return res

Source File: ocsvm.py From user-behavior-anomaly-detector with MIT License

5 votes

def print_accuracy(self, title, datasetY, predictions):
        print title

        print("accuracy: ", metrics.accuracy_score(datasetY, predictions))
        print("precision: ", metrics.precision_score(datasetY, predictions))
        print("recall: ", metrics.recall_score(datasetY, predictions))
        print("f1: ", metrics.f1_score(datasetY, predictions))
        print("area under curve (auc): ", metrics.roc_auc_score(datasetY, predictions))

Python sklearn.metrics.recall_score() Examples