Python Examples of sklearn.preprocessing.label

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
                            recall_13(average='weighted'))
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
            assert_not_equal(recall_13(average=average),
                             recall_all(average=average))

Source File: test_classification.py From twitter-stock-recommendation with MIT License

6 votes

def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
                            recall_13(average='weighted'))
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
            assert_not_equal(recall_13(average=average),
                             recall_all(average=average))

Source File: recall.py From driverlessai-recipes with Apache License 2.0

6 votes

def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties

Source File: precision.py From driverlessai-recipes with Apache License 2.0

6 votes

def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties

Source File: false_discovery_rate.py From driverlessai-recipes with Apache License 2.0

6 votes

def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties

Source File: utils.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License

6 votes

def weight_dict_fc(trainLabel, para):
    train_labels = []
    for i in range(len(trainLabel)):
        [train_labels.append(j) for j in trainLabel[i]]
    from sklearn.preprocessing import label_binarize
    y_total_40=label_binarize(train_labels, classes=[i for i in range(40)])
    class_distribution_40_class=np.sum(y_total_40,axis=0)
    class_distribution_40_class=[float(i) for i in class_distribution_40_class]
    class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
    inverse_dist=1/class_distribution_40_class
    norm_inv_dist=inverse_dist/np.sum(inverse_dist)
    weights=norm_inv_dist*para.weight_scaler+1
    weight_dict = dict()
    for classID, value in enumerate(weights):
        weight_dict.update({classID: value})
    return weight_dict

Source File: utils.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License

6 votes

def weight_dict_fc(trainLabel, para):
    train_labels = []
    for i in range(len(trainLabel)):
        [train_labels.append(j) for j in trainLabel[i]]
    class_number = len(np.unique(train_labels))
    from sklearn.preprocessing import label_binarize
    y_total_40=label_binarize(train_labels, classes=[i for i in range(para.outputClassN)])
    class_distribution_40_class=np.sum(y_total_40,axis=0)
    class_distribution_40_class=[float(i) for i in class_distribution_40_class]
    class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
    inverse_dist=1/class_distribution_40_class
    norm_inv_dist=inverse_dist/np.sum(inverse_dist)
    weights=norm_inv_dist*para.weight_scaler+1
    weight_dict = dict()
    for classID, value in enumerate(weights):
        weight_dict.update({classID: value})
    return weight_dict

Source File: ABuML.py From abu with GNU General Public License v3.0

6 votes

def cross_val_roc_auc_score(self, cv=10, **kwargs):
        """
        被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF,))装饰，
        即支持有监督学习分类，使用cross_val_score对数据进行roc_auc度量，如果数据的y的
        label标签 > 2，通过label_binarize将label标签进行二值化处理，
        依次计算二值化的列的roc_auc，结果返回score最好的数据度量
        :param cv: 透传cross_val_score的参数，默认10
        :param kwargs: 外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                       确定传递self._do_cross_val_score中参数x，y，
                       以及装饰器使用的fiter_type，eg：ttn_abu.cross_val_roc_auc_score(fiter_type=ml.EMLFitType.E_FIT_REG)
        :return: cross_val_score返回的score序列，
                 eg: array([ 1.  ,  0.9 ,  1.  ,  0.9 ,  1.  ,  0.9 ,  1.  ,  0.9 ,  0.95,  1.  ])
        """
        x = kwargs.pop('x', self.x)
        y = kwargs.pop('y', self.y)
        return self._do_cross_val_score(x, y, cv, _EMLScoreType.E_SCORE_ROC_AUC.value)

Source File: data_utils.py From videograph with GNU General Public License v3.0

6 votes

def __init__(self, feats_path, class_nums, n_classes, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10):
        random.seed(101)
        np.random.seed(101)

        self.__feats_pathes = feats_path
        self.__class_nums = class_nums
        self.__n_frames_per_video = n_frames_per_video
        self.__n_feat_maps = n_feat_maps
        self.__feat_map_side_dim = feat_map_side_dim

        self.__batch_size = batch_size

        # binarize the labels
        classes = range(1, n_classes + 1)
        self.__y = label_binarize(self.__class_nums, classes)

        self.__is_busy = False
        self.__batch_features = None
        self.__batch_y = None
        self.__n_threads_in_pool = n_threads
        self.__pool = Pool(self.__n_threads_in_pool)

Source File: test_classification.py From twitter-stock-recommendation with MIT License

5 votes

def test_matthews_corrcoef():
    rng = np.random.RandomState(0)
    y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]

    # corrcoef of same vectors must be 1
    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)

    # corrcoef, when the two vectors are opposites of each other, should be -1
    y_true_inv = ["b" if i == "a" else "a" for i in y_true]
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)

    y_true_inv2 = label_binarize(y_true, ["a", "b"])
    y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)

    # For the zero vector case, the corrcoef cannot be calculated and should
    # result in a RuntimeWarning
    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
                               matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # And also for any other vector with 0 variance
    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
                               matthews_corrcoef, y_true, ['a'] * len(y_true))

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # These two vectors have 0 correlation and hence mcc should be 0
    y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
    y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
    assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)

    # Check that sample weight is able to selectively exclude
    mask = [1] * 10 + [0] * 10
    # Now the first half of the vector elements are alone given a weight of 1
    # and hence the mcc will not be a perfect 0 as in the previous case
    assert_raises(AssertionError, assert_almost_equal,
                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)

Source File: test_classification.py From twitter-stock-recommendation with MIT License

5 votes

def test_precision_recall_f_extra_labels():
    # Test handling of explicit additional (not in input) labels to PRF
    y_true = [1, 3, 3, 2]
    y_pred = [1, 1, 3, 2]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        # No average: zeros in array
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average=None)
        assert_array_almost_equal([0., 1., 1., .5, 0.], actual)

        # Macro average is changed
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average='macro')
        assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)

        # No effect otheriwse
        for average in ['micro', 'weighted', 'samples']:
            if average == 'samples' and i == 0:
                continue
            assert_almost_equal(recall_score(y_true, y_pred,
                                             labels=[0, 1, 2, 3, 4],
                                             average=average),
                                recall_score(y_true, y_pred, labels=None,
                                             average=average))

    # Error when introducing invalid label in multilabel case
    # (although it would only affect performance if average='macro'/None)
    for average in [None, 'macro', 'micro', 'samples']:
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(6), average=average)
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(-1, 4), average=average)

Source File: utils.py From pysster with MIT License

5 votes

def performance_report(labels, predictions):
    from sklearn.preprocessing import label_binarize
    from sklearn.metrics import precision_recall_fscore_support
    classes =  list(range(labels.shape[1]))
    roc_aucs, pr_aucs  = [], []
    if len(classes) == 2:
        roc_aucs = [auROC(labels[:, 0], predictions[:, 0])[2]] * 2
        pr_aucs = [auPR(labels[:, 0], predictions[:, 0])[2]] * 2
        labels = label_binarize(np.argmax(labels, axis = 1), classes = classes)
    else:
        for x in classes:
            roc_aucs.append(auROC(labels[:, x], predictions[:, x])[2])
            pr_aucs.append(auPR(labels[:, x], predictions[:, x])[2])
    if not np.isclose(np.sum(predictions, axis=1), 1).all():
        # multi-label classification
        y_pred = predictions > 0.5
        y_pred.dtype = np.uint8
    else:
        y_pred = label_binarize(np.argmax(predictions, axis = 1), classes = classes)
    prec_recall_f1_support = precision_recall_fscore_support(labels, y_pred)
    report = np.empty((len(classes), 6))
    for x in classes:
        report[x,:] = [prec_recall_f1_support[0][x], prec_recall_f1_support[1][x],
                       prec_recall_f1_support[2][x], roc_aucs[x],
                       pr_aucs[x], prec_recall_f1_support[3][x]]
    return report

Source File: predict_deepchannel_QuB.py From Deep-Channel with MIT License

5 votes

def make_roc(gt,cpl,cl):
    from sklearn.preprocessing import label_binarize
    y_predict = label_binarize(gt, classes=[0, 1, 2, 3, 4, 5])
    print('c=',cl)
    y = label_binarize(cl, classes=[0, 1, 2, 3, 4, 5])
    n_classesi = y.shape[1]
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    from sklearn.metrics import roc_curve, auc
    for i in range(n_classesi):
        fpr[i], tpr[i], thre = roc_curve(y_predict[:, i], cpl[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        print('state=, {}, auc=,{}'.format(i,roc_auc[i]))

Source File: metrics.py From delira with GNU Affero General Public License v3.0

5 votes

def __call__(self, y_true, y_pred, **kwargs):
        """
        Compute auroc

        Parameters
        ----------
        y_true: np.ndarray
            ground truth data with shape (N)
        y_pred: np.ndarray
            predictions of network in numpy format with shape (N, nclasses)
        kwargs:
            variable number of keyword arguments passed to roc_auc_score

        Returns
        -------
        float
            computes auc score

        Raises
        ------
        ValueError
            if two classes are given and the predictions contain more than two
            classes
        """
        # binary classification
        if len(self.classes) == 2:
            # single output unit (e.g. sigmoid)
            if len(y_pred.shape) == 1 or y_pred.shape[2] == 1:
                return roc_auc_score(y_true, y_pred, **kwargs)
            # output of two units (e.g. softmax)
            elif y_pred.shape[2] == 2:
                return roc_auc_score(y_true, y_pred[:, 1], **kwargs)
            else:
                raise ValueError("Can not compute auroc metric for binary "
                                 "classes with {} predicted "
                                 "classes.".format(y_pred.shape[2]))

        # classification with multiple classes
        if len(self.classes) > 2:
            y_true_bin = label_binarize(y_true, self.classes)
            return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs)

Source File: struct_models.py From marseille with BSD 3-Clause "New" or "Revised" License

5 votes

def _marg_rounded(self, x, y):
        y_node = y.nodes
        y_link = y.links
        Y_node = label_binarize(y_node, self.prop_encoder_.classes_)
        Y_link = label_binarize(y_link, self.link_encoder_.classes_)

        # XXX can this be avoided?
        Y_node, Y_link = map(_binary_2d, (Y_node, Y_link))

        src_type = Y_node[x.link_to_prop[:, 0]]
        trg_type = Y_node[x.link_to_prop[:, 1]]

        if self.compat_features:
            pw = np.einsum('...j,...k,...l->...jkl',
                           src_type, trg_type, Y_link)
            compat = np.tensordot(x.X_compat.T, pw, axes=[1, 0])
        else:
            # equivalent to compat_features == np.ones(n_links)
            compat = np.einsum('ij,ik,il->jkl', src_type, trg_type, Y_link)

        second_order = []

        if self.coparents_ or self.grandparents_ or self.siblings_:
            link = {(a, b): k for k, (a, b) in enumerate(x.link_to_prop)}
            if self.coparents_:
                second_order.extend(y_link[link[a, b]] & y_link[link[c, b]]
                                    for a, b, c in x.second_order)
            if self.grandparents_:
                second_order.extend(y_link[link[a, b]] & y_link[link[b, c]]
                                    for a, b, c in x.second_order)
            if self.siblings_:
                second_order.extend(y_link[link[b, a]] & y_link[link[b, c]]
                                    for a, b, c in x.second_order)
        second_order = np.array(second_order)

        return Y_node, Y_link, compat, second_order

Source File: test.py From SpatioTemporalSegmentation with MIT License

5 votes

def average_precision(prob_np, target_np):
  num_class = prob_np.shape[1]
  label = label_binarize(target_np, classes=list(range(num_class)))
  with np.errstate(divide='ignore', invalid='ignore'):
    return average_precision_score(label, prob_np, None)

Source File: examples.py From dython with BSD 3-Clause "New" or "Revised" License

5 votes

def roc_graph_example():
    """
    Plot an example ROC graph of an SVM model predictions over the Iris
    dataset.

    Based on sklearn examples (as was seen on April 2018):
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
    """

    # Load data
    iris = datasets.load_iris()
    X = iris.data
    y = label_binarize(iris.target, classes=[0, 1, 2])

    # Add noisy features
    random_state = np.random.RandomState(4)
    n_samples, n_features = X.shape
    X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

    # Train a model
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=0))

    # Predict
    y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

    # Plot ROC graphs
    return roc_graph(y_test, y_score, class_names=iris.target_names)

Source File: roc.py From RIDDLE with Apache License 2.0

5 votes

def _compute_roc_stats(y_test, y_test_probas, num_class):
    """Compute ROC AUC statistics and visualize ROC curves.

    Arguments:
        y_test: [int]
            list of test class labels as integer indices
        y_test_probas: np.ndarray, float
            array of predicted probabilities with shape
            (num_sample, num_class)
        num_class: int
            number of classes

    Returns:
        roc_auc_dict: {int: float}
            dictionary mapping classes to ROC AUC scores
        fpr_dict: {string: np.ndarray}
            dictionary mapping names of classes or an averaging method to
            arrays of increasing false positive rates
        tpr_dict: {string: float}
            dictionary mapping names of classes or an averaging method to
            arrays of increasing true positive rates
    """
    y_test = label_binarize(y_test, classes=range(0, num_class))

    fpr_dict, tpr_dict, roc_auc_dict = {}, {}, {}
    for i in range(num_class):
        fpr_dict[i], tpr_dict[i], _ = roc_curve(
            y_test[:, i], y_test_probas[:, i])
        roc_auc_dict[i] = auc(fpr_dict[i], tpr_dict[i])

    # Compute micro-average ROC curve and ROC area
    fpr_dict["micro"], tpr_dict["micro"], _ = roc_curve(
        y_test.ravel(), y_test_probas.ravel())
    roc_auc_dict["micro"] = auc(fpr_dict["micro"], tpr_dict["micro"])

    return roc_auc_dict, fpr_dict, tpr_dict

Source File: model.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License

5 votes

def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
    test_loss = []
    test_acc = []
    test_predict = []
    for i in range(len(inputCoor)):
        xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTest = graphTest.tocsr()
        labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
        test_batch_size = para.testBatchSize
        for testBatchID in range(len(labelTest) / test_batch_size):
            start = testBatchID * test_batch_size
            end = start + test_batch_size
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
            batchWeight = uniform_weight(batchLabel)
            batchGraph = batchGraph.todense()

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}

            predict, loss_test, acc_test = sess.run(
                [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
            test_loss.append(loss_test)
            test_acc.append(acc_test)
            test_predict.append(predict)

    test_average_loss = np.mean(test_loss)
    test_average_acc = np.mean(test_acc)

    return test_average_loss, test_average_acc, test_predict

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_matthews_corrcoef():
    rng = np.random.RandomState(0)
    y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]

    # corrcoef of same vectors must be 1
    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)

    # corrcoef, when the two vectors are opposites of each other, should be -1
    y_true_inv = ["b" if i == "a" else "a" for i in y_true]
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)

    y_true_inv2 = label_binarize(y_true, ["a", "b"])
    y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)

    # For the zero vector case, the corrcoef cannot be calculated and should
    # result in a RuntimeWarning
    mcc = assert_warns_div0(matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # And also for any other vector with 0 variance
    mcc = assert_warns_div0(matthews_corrcoef, y_true, ['a'] * len(y_true))

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # These two vectors have 0 correlation and hence mcc should be 0
    y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
    y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
    assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)

    # Check that sample weight is able to selectively exclude
    mask = [1] * 10 + [0] * 10
    # Now the first half of the vector elements are alone given a weight of 1
    # and hence the mcc will not be a perfect 0 as in the previous case
    assert_raises(AssertionError, assert_almost_equal,
                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)

Source File: roc.py From sklearn-evaluation with MIT License

4 votes

def roc(y_true, y_score, ax=None):
    """
    Plot ROC curve.

    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Correct target values (ground truth).
    y_score : array-like, shape = [n_samples] or [n_samples, 2] for binary
              classification or [n_samples, n_classes] for multiclass

        Target scores (estimator predictions).
    ax: matplotlib Axes
        Axes object to draw the plot onto, otherwise uses current Axes

    Notes
    -----
    It is assumed that the y_score parameter columns are in order. For example,
    if ``y_true = [2, 2, 1, 0, 0, 1, 2]``, then the first column in y_score
    must countain the scores for class 0, second column for class 1 and so on.


    Returns
    -------
    ax: matplotlib Axes
        Axes containing the plot

    Examples
    --------
    .. plot:: ../../examples/roc.py

    """
    if any((val is None for val in (y_true, y_score))):
        raise ValueError("y_true and y_score are needed to plot ROC")

    if ax is None:
        ax = plt.gca()

    # get the number of classes based on the shape of y_score
    y_score_is_vector = is_column_vector(y_score) or is_row_vector(y_score)
    if y_score_is_vector:
        n_classes = 2
    else:
        _, n_classes = y_score.shape

    # check data shape?

    if n_classes > 2:
        # convert y_true to binary format
        y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
        _roc_multi(y_true_bin, y_score, ax=ax)
        for i in range(n_classes):
            _roc(y_true_bin[:, i], y_score[:, i], ax=ax)
    else:
        if y_score_is_vector:
            _roc(y_true, y_score, ax)
        else:
            _roc(y_true, y_score[:, 1], ax)

    # raise error if n_classes = 1?
    return ax

Source File: model.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License

4 votes

def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate):
    dataChunkLoss = []
    dataChunkAcc = []
    dataChunkRegLoss = []
    for i in range(len(inputCoor)):
        xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTrain_1 = graphTrain_1.tocsr()
        labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)])
        xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize)
        # labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)])

        batch_loss = []
        batch_acc = []
        batch_reg = []
        batchSize = para.batchSize
        for batchID in range(len(labelBinarize) / para.batchSize):
            start = batchID * batchSize
            end = start + batchSize
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end)
            batchGraph = batchGraph.todense()


            batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
            if para.weighting_scheme == 'uniform':
                batchWeight = uniform_weight(batchLabel)
            elif para.weighting_scheme == 'weighted':
                batchWeight = weights_calculation(batchLabel, weight_dict)
            else:
                print 'please enter the valid weighting scheme'
	        
	    #print batchWeight

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate,
                         trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2}

            opt, loss_train, acc_train, loss_reg_train = sess.run(
                [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']],
                feed_dict=feed_dict)

            #print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train))
            batch_loss.append(loss_train)
            batch_acc.append(acc_train)
            batch_reg.append(loss_reg_train)

        dataChunkLoss.append(np.mean(batch_loss))
        dataChunkAcc.append(np.mean(batch_acc))
        dataChunkRegLoss.append(np.mean(batch_reg))


    train_average_loss = np.mean(dataChunkLoss)
    train_average_acc = np.mean(dataChunkAcc)
    loss_reg_average = np.mean(dataChunkRegLoss)
    return train_average_loss, train_average_acc, loss_reg_average

Source File: model_multi_res.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License

4 votes

def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
    # Description: Performance on the test set data
    # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1)
    #        (4) para: global Parameters  (5) sess: Session (6) trainOperaion: placeholder dictionary
    # Return: average loss, acc, regularization loss for test set
    test_loss = []
    test_acc = []
    test_predict = []
    for i in range(len(inputCoor)):
        xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTest = graphTest.tocsr()
        labelBinarize = label_binarize(labelTest, classes=[j for j in range(40)])
        test_batch_size = para.testBatchSize
        for testBatchID in range(len(labelTest) / test_batch_size):
            start = testBatchID * test_batch_size
            end = start + test_batch_size
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
            batchWeight = uniform_weight(batchLabel)
            batchGraph = batchGraph.todense()

            batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1,
                                                                   k=para.nearestNeighborL1, batch_size=test_batch_size,
                                                                   nodes_n=para.pointNumber)

            batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = test_batch_size, M = para.clusterNumberL1)


            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0,
                         trainOperaion['batch_index_l1']: batchIndexL1,
                         trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: test_batch_size
                         }

            predict, loss_test, acc_test = sess.run(
                [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
            test_loss.append(loss_test)
            test_acc.append(acc_test)
            test_predict.append(predict)

    test_average_loss = np.mean(test_loss)
    test_average_acc = np.mean(test_acc)

    return test_average_loss, test_average_acc, test_predict

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_precision_recall_f_extra_labels():
    # Test handling of explicit additional (not in input) labels to PRF
    y_true = [1, 3, 3, 2]
    y_pred = [1, 1, 3, 2]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        # No average: zeros in array
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average=None)
        assert_array_almost_equal([0., 1., 1., .5, 0.], actual)

        # Macro average is changed
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average='macro')
        assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)

        # No effect otheriwse
        for average in ['micro', 'weighted', 'samples']:
            if average == 'samples' and i == 0:
                continue
            assert_almost_equal(recall_score(y_true, y_pred,
                                             labels=[0, 1, 2, 3, 4],
                                             average=average),
                                recall_score(y_true, y_pred, labels=None,
                                             average=average))

    # Error when introducing invalid label in multilabel case
    # (although it would only affect performance if average='macro'/None)
    for average in [None, 'macro', 'micro', 'samples']:
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(6), average=average)
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(-1, 4), average=average)

    # tests non-regression on issue #10307
    y_true = np.array([[0, 1, 1], [1, 0, 0]])
    y_pred = np.array([[1, 1, 1], [1, 0, 1]])
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                 average='samples',
                                                 labels=[0, 1])
    assert_almost_equal(np.array([p, r, f]), np.array([3 / 4, 1, 5 / 6]))

Python sklearn.preprocessing.label_binarize() Examples