Python sklearn.preprocessing.label_binarize() Examples

The following are 24 code examples of sklearn.preprocessing.label_binarize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
                            recall_13(average='weighted'))
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
            assert_not_equal(recall_13(average=average),
                             recall_all(average=average)) 
Example #2
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
                            recall_13(average='weighted'))
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
            assert_not_equal(recall_13(average=average),
                             recall_all(average=average)) 
Example #3
Source File: recall.py    From driverlessai-recipes with Apache License 2.0 6 votes vote down vote up
def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties 
Example #4
Source File: precision.py    From driverlessai-recipes with Apache License 2.0 6 votes vote down vote up
def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties 
Example #5
Source File: false_discovery_rate.py    From driverlessai-recipes with Apache License 2.0 6 votes vote down vote up
def score(self,
              actual: np.array,
              predicted: np.array,
              sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None,
              **kwargs) -> float:

        if sample_weight is not None:
            sample_weight = sample_weight.ravel()
        enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
        cm_weights = sample_weight if sample_weight is not None else None

        # multiclass
        if enc_predicted.shape[1] > 1:
            enc_predicted = enc_predicted.ravel()
            enc_actual = label_binarize(enc_actual, labels).ravel()
            cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
            assert enc_predicted.shape == enc_actual.shape
            assert cm_weights is None or enc_predicted.shape == cm_weights.shape

        cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
        cms = cms.loc[
            cms[[self.__class__._threshold_optimizer]].idxmax()]  # get row(s) for optimal metric defined above
        cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
        return cms['metric'].mean()  # in case of ties 
Example #6
Source File: utils.py    From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License 6 votes vote down vote up
def weight_dict_fc(trainLabel, para):
    train_labels = []
    for i in range(len(trainLabel)):
        [train_labels.append(j) for j in trainLabel[i]]
    from sklearn.preprocessing import label_binarize
    y_total_40=label_binarize(train_labels, classes=[i for i in range(40)])
    class_distribution_40_class=np.sum(y_total_40,axis=0)
    class_distribution_40_class=[float(i) for i in class_distribution_40_class]
    class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
    inverse_dist=1/class_distribution_40_class
    norm_inv_dist=inverse_dist/np.sum(inverse_dist)
    weights=norm_inv_dist*para.weight_scaler+1
    weight_dict = dict()
    for classID, value in enumerate(weights):
        weight_dict.update({classID: value})
    return weight_dict 
Example #7
Source File: utils.py    From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License 6 votes vote down vote up
def weight_dict_fc(trainLabel, para):
    train_labels = []
    for i in range(len(trainLabel)):
        [train_labels.append(j) for j in trainLabel[i]]
    class_number = len(np.unique(train_labels))
    from sklearn.preprocessing import label_binarize
    y_total_40=label_binarize(train_labels, classes=[i for i in range(para.outputClassN)])
    class_distribution_40_class=np.sum(y_total_40,axis=0)
    class_distribution_40_class=[float(i) for i in class_distribution_40_class]
    class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
    inverse_dist=1/class_distribution_40_class
    norm_inv_dist=inverse_dist/np.sum(inverse_dist)
    weights=norm_inv_dist*para.weight_scaler+1
    weight_dict = dict()
    for classID, value in enumerate(weights):
        weight_dict.update({classID: value})
    return weight_dict 
Example #8
Source File: ABuML.py    From abu with GNU General Public License v3.0 6 votes vote down vote up
def cross_val_roc_auc_score(self, cv=10, **kwargs):
        """
        被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF,))装饰,
        即支持有监督学习分类,使用cross_val_score对数据进行roc_auc度量,如果数据的y的
        label标签 > 2,通过label_binarize将label标签进行二值化处理,
        依次计算二值化的列的roc_auc,结果返回score最好的数据度量
        :param cv: 透传cross_val_score的参数,默认10
        :param kwargs: 外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                       确定传递self._do_cross_val_score中参数x,y,
                       以及装饰器使用的fiter_type,eg:ttn_abu.cross_val_roc_auc_score(fiter_type=ml.EMLFitType.E_FIT_REG)
        :return: cross_val_score返回的score序列,
                 eg: array([ 1.  ,  0.9 ,  1.  ,  0.9 ,  1.  ,  0.9 ,  1.  ,  0.9 ,  0.95,  1.  ])
        """
        x = kwargs.pop('x', self.x)
        y = kwargs.pop('y', self.y)
        return self._do_cross_val_score(x, y, cv, _EMLScoreType.E_SCORE_ROC_AUC.value) 
Example #9
Source File: data_utils.py    From videograph with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, feats_path, class_nums, n_classes, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10):
        random.seed(101)
        np.random.seed(101)

        self.__feats_pathes = feats_path
        self.__class_nums = class_nums
        self.__n_frames_per_video = n_frames_per_video
        self.__n_feat_maps = n_feat_maps
        self.__feat_map_side_dim = feat_map_side_dim

        self.__batch_size = batch_size

        # binarize the labels
        classes = range(1, n_classes + 1)
        self.__y = label_binarize(self.__class_nums, classes)

        self.__is_busy = False
        self.__batch_features = None
        self.__batch_y = None
        self.__n_threads_in_pool = n_threads
        self.__pool = Pool(self.__n_threads_in_pool) 
Example #10
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_matthews_corrcoef():
    rng = np.random.RandomState(0)
    y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]

    # corrcoef of same vectors must be 1
    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)

    # corrcoef, when the two vectors are opposites of each other, should be -1
    y_true_inv = ["b" if i == "a" else "a" for i in y_true]
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)

    y_true_inv2 = label_binarize(y_true, ["a", "b"])
    y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)

    # For the zero vector case, the corrcoef cannot be calculated and should
    # result in a RuntimeWarning
    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
                               matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # And also for any other vector with 0 variance
    mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
                               matthews_corrcoef, y_true, ['a'] * len(y_true))

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # These two vectors have 0 correlation and hence mcc should be 0
    y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
    y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
    assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)

    # Check that sample weight is able to selectively exclude
    mask = [1] * 10 + [0] * 10
    # Now the first half of the vector elements are alone given a weight of 1
    # and hence the mcc will not be a perfect 0 as in the previous case
    assert_raises(AssertionError, assert_almost_equal,
                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.) 
Example #11
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_precision_recall_f_extra_labels():
    # Test handling of explicit additional (not in input) labels to PRF
    y_true = [1, 3, 3, 2]
    y_pred = [1, 1, 3, 2]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        # No average: zeros in array
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average=None)
        assert_array_almost_equal([0., 1., 1., .5, 0.], actual)

        # Macro average is changed
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average='macro')
        assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)

        # No effect otheriwse
        for average in ['micro', 'weighted', 'samples']:
            if average == 'samples' and i == 0:
                continue
            assert_almost_equal(recall_score(y_true, y_pred,
                                             labels=[0, 1, 2, 3, 4],
                                             average=average),
                                recall_score(y_true, y_pred, labels=None,
                                             average=average))

    # Error when introducing invalid label in multilabel case
    # (although it would only affect performance if average='macro'/None)
    for average in [None, 'macro', 'micro', 'samples']:
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(6), average=average)
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(-1, 4), average=average) 
Example #12
Source File: utils.py    From pysster with MIT License 5 votes vote down vote up
def performance_report(labels, predictions):
    from sklearn.preprocessing import label_binarize
    from sklearn.metrics import precision_recall_fscore_support
    classes =  list(range(labels.shape[1]))
    roc_aucs, pr_aucs  = [], []
    if len(classes) == 2:
        roc_aucs = [auROC(labels[:, 0], predictions[:, 0])[2]] * 2
        pr_aucs = [auPR(labels[:, 0], predictions[:, 0])[2]] * 2
        labels = label_binarize(np.argmax(labels, axis = 1), classes = classes)
    else:
        for x in classes:
            roc_aucs.append(auROC(labels[:, x], predictions[:, x])[2])
            pr_aucs.append(auPR(labels[:, x], predictions[:, x])[2])
    if not np.isclose(np.sum(predictions, axis=1), 1).all():
        # multi-label classification
        y_pred = predictions > 0.5
        y_pred.dtype = np.uint8
    else:
        y_pred = label_binarize(np.argmax(predictions, axis = 1), classes = classes)
    prec_recall_f1_support = precision_recall_fscore_support(labels, y_pred)
    report = np.empty((len(classes), 6))
    for x in classes:
        report[x,:] = [prec_recall_f1_support[0][x], prec_recall_f1_support[1][x],
                       prec_recall_f1_support[2][x], roc_aucs[x],
                       pr_aucs[x], prec_recall_f1_support[3][x]]
    return report 
Example #13
Source File: predict_deepchannel_QuB.py    From Deep-Channel with MIT License 5 votes vote down vote up
def make_roc(gt,cpl,cl):
    from sklearn.preprocessing import label_binarize
    y_predict = label_binarize(gt, classes=[0, 1, 2, 3, 4, 5])
    print('c=',cl)
    y = label_binarize(cl, classes=[0, 1, 2, 3, 4, 5])
    n_classesi = y.shape[1]
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    from sklearn.metrics import roc_curve, auc
    for i in range(n_classesi):
        fpr[i], tpr[i], thre = roc_curve(y_predict[:, i], cpl[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        print('state=, {}, auc=,{}'.format(i,roc_auc[i])) 
Example #14
Source File: metrics.py    From delira with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, y_true, y_pred, **kwargs):
        """
        Compute auroc

        Parameters
        ----------
        y_true: np.ndarray
            ground truth data with shape (N)
        y_pred: np.ndarray
            predictions of network in numpy format with shape (N, nclasses)
        kwargs:
            variable number of keyword arguments passed to roc_auc_score

        Returns
        -------
        float
            computes auc score

        Raises
        ------
        ValueError
            if two classes are given and the predictions contain more than two
            classes
        """
        # binary classification
        if len(self.classes) == 2:
            # single output unit (e.g. sigmoid)
            if len(y_pred.shape) == 1 or y_pred.shape[2] == 1:
                return roc_auc_score(y_true, y_pred, **kwargs)
            # output of two units (e.g. softmax)
            elif y_pred.shape[2] == 2:
                return roc_auc_score(y_true, y_pred[:, 1], **kwargs)
            else:
                raise ValueError("Can not compute auroc metric for binary "
                                 "classes with {} predicted "
                                 "classes.".format(y_pred.shape[2]))

        # classification with multiple classes
        if len(self.classes) > 2:
            y_true_bin = label_binarize(y_true, self.classes)
            return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs) 
Example #15
Source File: struct_models.py    From marseille with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _marg_rounded(self, x, y):
        y_node = y.nodes
        y_link = y.links
        Y_node = label_binarize(y_node, self.prop_encoder_.classes_)
        Y_link = label_binarize(y_link, self.link_encoder_.classes_)

        # XXX can this be avoided?
        Y_node, Y_link = map(_binary_2d, (Y_node, Y_link))

        src_type = Y_node[x.link_to_prop[:, 0]]
        trg_type = Y_node[x.link_to_prop[:, 1]]

        if self.compat_features:
            pw = np.einsum('...j,...k,...l->...jkl',
                           src_type, trg_type, Y_link)
            compat = np.tensordot(x.X_compat.T, pw, axes=[1, 0])
        else:
            # equivalent to compat_features == np.ones(n_links)
            compat = np.einsum('ij,ik,il->jkl', src_type, trg_type, Y_link)

        second_order = []

        if self.coparents_ or self.grandparents_ or self.siblings_:
            link = {(a, b): k for k, (a, b) in enumerate(x.link_to_prop)}
            if self.coparents_:
                second_order.extend(y_link[link[a, b]] & y_link[link[c, b]]
                                    for a, b, c in x.second_order)
            if self.grandparents_:
                second_order.extend(y_link[link[a, b]] & y_link[link[b, c]]
                                    for a, b, c in x.second_order)
            if self.siblings_:
                second_order.extend(y_link[link[b, a]] & y_link[link[b, c]]
                                    for a, b, c in x.second_order)
        second_order = np.array(second_order)

        return Y_node, Y_link, compat, second_order 
Example #16
Source File: test.py    From SpatioTemporalSegmentation with MIT License 5 votes vote down vote up
def average_precision(prob_np, target_np):
  num_class = prob_np.shape[1]
  label = label_binarize(target_np, classes=list(range(num_class)))
  with np.errstate(divide='ignore', invalid='ignore'):
    return average_precision_score(label, prob_np, None) 
Example #17
Source File: examples.py    From dython with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def roc_graph_example():
    """
    Plot an example ROC graph of an SVM model predictions over the Iris
    dataset.

    Based on sklearn examples (as was seen on April 2018):
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
    """

    # Load data
    iris = datasets.load_iris()
    X = iris.data
    y = label_binarize(iris.target, classes=[0, 1, 2])

    # Add noisy features
    random_state = np.random.RandomState(4)
    n_samples, n_features = X.shape
    X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

    # Train a model
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=0))

    # Predict
    y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

    # Plot ROC graphs
    return roc_graph(y_test, y_score, class_names=iris.target_names) 
Example #18
Source File: roc.py    From RIDDLE with Apache License 2.0 5 votes vote down vote up
def _compute_roc_stats(y_test, y_test_probas, num_class):
    """Compute ROC AUC statistics and visualize ROC curves.

    Arguments:
        y_test: [int]
            list of test class labels as integer indices
        y_test_probas: np.ndarray, float
            array of predicted probabilities with shape
            (num_sample, num_class)
        num_class: int
            number of classes

    Returns:
        roc_auc_dict: {int: float}
            dictionary mapping classes to ROC AUC scores
        fpr_dict: {string: np.ndarray}
            dictionary mapping names of classes or an averaging method to
            arrays of increasing false positive rates
        tpr_dict: {string: float}
            dictionary mapping names of classes or an averaging method to
            arrays of increasing true positive rates
    """
    y_test = label_binarize(y_test, classes=range(0, num_class))

    fpr_dict, tpr_dict, roc_auc_dict = {}, {}, {}
    for i in range(num_class):
        fpr_dict[i], tpr_dict[i], _ = roc_curve(
            y_test[:, i], y_test_probas[:, i])
        roc_auc_dict[i] = auc(fpr_dict[i], tpr_dict[i])

    # Compute micro-average ROC curve and ROC area
    fpr_dict["micro"], tpr_dict["micro"], _ = roc_curve(
        y_test.ravel(), y_test_probas.ravel())
    roc_auc_dict["micro"] = auc(fpr_dict["micro"], tpr_dict["micro"])

    return roc_auc_dict, fpr_dict, tpr_dict 
Example #19
Source File: model.py    From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License 5 votes vote down vote up
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
    test_loss = []
    test_acc = []
    test_predict = []
    for i in range(len(inputCoor)):
        xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTest = graphTest.tocsr()
        labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
        test_batch_size = para.testBatchSize
        for testBatchID in range(len(labelTest) / test_batch_size):
            start = testBatchID * test_batch_size
            end = start + test_batch_size
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
            batchWeight = uniform_weight(batchLabel)
            batchGraph = batchGraph.todense()

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}

            predict, loss_test, acc_test = sess.run(
                [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
            test_loss.append(loss_test)
            test_acc.append(acc_test)
            test_predict.append(predict)

    test_average_loss = np.mean(test_loss)
    test_average_acc = np.mean(test_acc)

    return test_average_loss, test_average_acc, test_predict 
Example #20
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_matthews_corrcoef():
    rng = np.random.RandomState(0)
    y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]

    # corrcoef of same vectors must be 1
    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)

    # corrcoef, when the two vectors are opposites of each other, should be -1
    y_true_inv = ["b" if i == "a" else "a" for i in y_true]
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)

    y_true_inv2 = label_binarize(y_true, ["a", "b"])
    y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
    assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)

    # For the zero vector case, the corrcoef cannot be calculated and should
    # result in a RuntimeWarning
    mcc = assert_warns_div0(matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # And also for any other vector with 0 variance
    mcc = assert_warns_div0(matthews_corrcoef, y_true, ['a'] * len(y_true))

    # But will output 0
    assert_almost_equal(mcc, 0.)

    # These two vectors have 0 correlation and hence mcc should be 0
    y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
    y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
    assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)

    # Check that sample weight is able to selectively exclude
    mask = [1] * 10 + [0] * 10
    # Now the first half of the vector elements are alone given a weight of 1
    # and hence the mcc will not be a perfect 0 as in the previous case
    assert_raises(AssertionError, assert_almost_equal,
                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.) 
Example #21
Source File: roc.py    From sklearn-evaluation with MIT License 4 votes vote down vote up
def roc(y_true, y_score, ax=None):
    """
    Plot ROC curve.

    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Correct target values (ground truth).
    y_score : array-like, shape = [n_samples] or [n_samples, 2] for binary
              classification or [n_samples, n_classes] for multiclass

        Target scores (estimator predictions).
    ax: matplotlib Axes
        Axes object to draw the plot onto, otherwise uses current Axes

    Notes
    -----
    It is assumed that the y_score parameter columns are in order. For example,
    if ``y_true = [2, 2, 1, 0, 0, 1, 2]``, then the first column in y_score
    must countain the scores for class 0, second column for class 1 and so on.


    Returns
    -------
    ax: matplotlib Axes
        Axes containing the plot

    Examples
    --------
    .. plot:: ../../examples/roc.py

    """
    if any((val is None for val in (y_true, y_score))):
        raise ValueError("y_true and y_score are needed to plot ROC")

    if ax is None:
        ax = plt.gca()

    # get the number of classes based on the shape of y_score
    y_score_is_vector = is_column_vector(y_score) or is_row_vector(y_score)
    if y_score_is_vector:
        n_classes = 2
    else:
        _, n_classes = y_score.shape

    # check data shape?

    if n_classes > 2:
        # convert y_true to binary format
        y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
        _roc_multi(y_true_bin, y_score, ax=ax)
        for i in range(n_classes):
            _roc(y_true_bin[:, i], y_score[:, i], ax=ax)
    else:
        if y_score_is_vector:
            _roc(y_true, y_score, ax)
        else:
            _roc(y_true, y_score[:, 1], ax)

    # raise error if n_classes = 1?
    return ax 
Example #22
Source File: model.py    From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License 4 votes vote down vote up
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate):
    dataChunkLoss = []
    dataChunkAcc = []
    dataChunkRegLoss = []
    for i in range(len(inputCoor)):
        xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTrain_1 = graphTrain_1.tocsr()
        labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)])
        xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize)
        # labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)])

        batch_loss = []
        batch_acc = []
        batch_reg = []
        batchSize = para.batchSize
        for batchID in range(len(labelBinarize) / para.batchSize):
            start = batchID * batchSize
            end = start + batchSize
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end)
            batchGraph = batchGraph.todense()


            batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
            if para.weighting_scheme == 'uniform':
                batchWeight = uniform_weight(batchLabel)
            elif para.weighting_scheme == 'weighted':
                batchWeight = weights_calculation(batchLabel, weight_dict)
            else:
                print 'please enter the valid weighting scheme'
	        
	    #print batchWeight

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate,
                         trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2}

            opt, loss_train, acc_train, loss_reg_train = sess.run(
                [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']],
                feed_dict=feed_dict)

            #print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train))
            batch_loss.append(loss_train)
            batch_acc.append(acc_train)
            batch_reg.append(loss_reg_train)

        dataChunkLoss.append(np.mean(batch_loss))
        dataChunkAcc.append(np.mean(batch_acc))
        dataChunkRegLoss.append(np.mean(batch_reg))


    train_average_loss = np.mean(dataChunkLoss)
    train_average_acc = np.mean(dataChunkAcc)
    loss_reg_average = np.mean(dataChunkRegLoss)
    return train_average_loss, train_average_acc, loss_reg_average 
Example #23
Source File: model_multi_res.py    From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License 4 votes vote down vote up
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
    # Description: Performance on the test set data
    # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1)
    #        (4) para: global Parameters  (5) sess: Session (6) trainOperaion: placeholder dictionary
    # Return: average loss, acc, regularization loss for test set
    test_loss = []
    test_acc = []
    test_predict = []
    for i in range(len(inputCoor)):
        xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTest = graphTest.tocsr()
        labelBinarize = label_binarize(labelTest, classes=[j for j in range(40)])
        test_batch_size = para.testBatchSize
        for testBatchID in range(len(labelTest) / test_batch_size):
            start = testBatchID * test_batch_size
            end = start + test_batch_size
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
            batchWeight = uniform_weight(batchLabel)
            batchGraph = batchGraph.todense()

            batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1,
                                                                   k=para.nearestNeighborL1, batch_size=test_batch_size,
                                                                   nodes_n=para.pointNumber)

            batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = test_batch_size, M = para.clusterNumberL1)


            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0,
                         trainOperaion['batch_index_l1']: batchIndexL1,
                         trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: test_batch_size
                         }

            predict, loss_test, acc_test = sess.run(
                [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
            test_loss.append(loss_test)
            test_acc.append(acc_test)
            test_predict.append(predict)

    test_average_loss = np.mean(test_loss)
    test_average_acc = np.mean(test_acc)

    return test_average_loss, test_average_acc, test_predict 
Example #24
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_precision_recall_f_extra_labels():
    # Test handling of explicit additional (not in input) labels to PRF
    y_true = [1, 3, 3, 2]
    y_pred = [1, 1, 3, 2]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        # No average: zeros in array
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average=None)
        assert_array_almost_equal([0., 1., 1., .5, 0.], actual)

        # Macro average is changed
        actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
                              average='macro')
        assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)

        # No effect otheriwse
        for average in ['micro', 'weighted', 'samples']:
            if average == 'samples' and i == 0:
                continue
            assert_almost_equal(recall_score(y_true, y_pred,
                                             labels=[0, 1, 2, 3, 4],
                                             average=average),
                                recall_score(y_true, y_pred, labels=None,
                                             average=average))

    # Error when introducing invalid label in multilabel case
    # (although it would only affect performance if average='macro'/None)
    for average in [None, 'macro', 'micro', 'samples']:
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(6), average=average)
        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
                      labels=np.arange(-1, 4), average=average)

    # tests non-regression on issue #10307
    y_true = np.array([[0, 1, 1], [1, 0, 0]])
    y_pred = np.array([[1, 1, 1], [1, 0, 1]])
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                 average='samples',
                                                 labels=[0, 1])
    assert_almost_equal(np.array([p, r, f]), np.array([3 / 4, 1, 5 / 6]))