Python sklearn.preprocessing.label_binarize() Examples
The following are 24
code examples of sklearn.preprocessing.label_binarize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_precision_recall_f_ignored_labels(): # Test a subset of labels may be requested for PRF y_true = [1, 1, 2, 3] y_pred = [1, 3, 3, 3] y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) data = [(y_true, y_pred), (y_true_bin, y_pred_bin)] for i, (y_true, y_pred) in enumerate(data): recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3]) recall_all = partial(recall_score, y_true, y_pred, labels=None) assert_array_almost_equal([.5, 1.], recall_13(average=None)) assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro')) assert_almost_equal((.5 * 2 + 1. * 1) / 3, recall_13(average='weighted')) assert_almost_equal(2. / 3, recall_13(average='micro')) # ensure the above were meaningful tests: for average in ['macro', 'weighted', 'micro']: assert_not_equal(recall_13(average=average), recall_all(average=average))
Example #2
Source File: test_classification.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_precision_recall_f_ignored_labels(): # Test a subset of labels may be requested for PRF y_true = [1, 1, 2, 3] y_pred = [1, 3, 3, 3] y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) data = [(y_true, y_pred), (y_true_bin, y_pred_bin)] for i, (y_true, y_pred) in enumerate(data): recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3]) recall_all = partial(recall_score, y_true, y_pred, labels=None) assert_array_almost_equal([.5, 1.], recall_13(average=None)) assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro')) assert_almost_equal((.5 * 2 + 1. * 1) / 3, recall_13(average='weighted')) assert_almost_equal(2. / 3, recall_13(average='micro')) # ensure the above were meaningful tests: for average in ['macro', 'weighted', 'micro']: assert_not_equal(recall_13(average=average), recall_all(average=average))
Example #3
Source File: recall.py From driverlessai-recipes with Apache License 2.0 | 6 votes |
def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None, labels: typing.Optional[np.array] = None, **kwargs) -> float: if sample_weight is not None: sample_weight = sample_weight.ravel() enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels) cm_weights = sample_weight if sample_weight is not None else None # multiclass if enc_predicted.shape[1] > 1: enc_predicted = enc_predicted.ravel() enc_actual = label_binarize(enc_actual, labels).ravel() cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None assert enc_predicted.shape == enc_actual.shape assert cm_weights is None or enc_predicted.shape == cm_weights.shape cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights) cms = cms.loc[ cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True) return cms['metric'].mean() # in case of ties
Example #4
Source File: precision.py From driverlessai-recipes with Apache License 2.0 | 6 votes |
def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None, labels: typing.Optional[np.array] = None, **kwargs) -> float: if sample_weight is not None: sample_weight = sample_weight.ravel() enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels) cm_weights = sample_weight if sample_weight is not None else None # multiclass if enc_predicted.shape[1] > 1: enc_predicted = enc_predicted.ravel() enc_actual = label_binarize(enc_actual, labels).ravel() cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None assert enc_predicted.shape == enc_actual.shape assert cm_weights is None or enc_predicted.shape == cm_weights.shape cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights) cms = cms.loc[ cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True) return cms['metric'].mean() # in case of ties
Example #5
Source File: false_discovery_rate.py From driverlessai-recipes with Apache License 2.0 | 6 votes |
def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None, labels: typing.Optional[np.array] = None, **kwargs) -> float: if sample_weight is not None: sample_weight = sample_weight.ravel() enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels) cm_weights = sample_weight if sample_weight is not None else None # multiclass if enc_predicted.shape[1] > 1: enc_predicted = enc_predicted.ravel() enc_actual = label_binarize(enc_actual, labels).ravel() cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None assert enc_predicted.shape == enc_actual.shape assert cm_weights is None or enc_predicted.shape == cm_weights.shape cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights) cms = cms.loc[ cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True) return cms['metric'].mean() # in case of ties
Example #6
Source File: utils.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License | 6 votes |
def weight_dict_fc(trainLabel, para): train_labels = [] for i in range(len(trainLabel)): [train_labels.append(j) for j in trainLabel[i]] from sklearn.preprocessing import label_binarize y_total_40=label_binarize(train_labels, classes=[i for i in range(40)]) class_distribution_40_class=np.sum(y_total_40,axis=0) class_distribution_40_class=[float(i) for i in class_distribution_40_class] class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class) inverse_dist=1/class_distribution_40_class norm_inv_dist=inverse_dist/np.sum(inverse_dist) weights=norm_inv_dist*para.weight_scaler+1 weight_dict = dict() for classID, value in enumerate(weights): weight_dict.update({classID: value}) return weight_dict
Example #7
Source File: utils.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License | 6 votes |
def weight_dict_fc(trainLabel, para): train_labels = [] for i in range(len(trainLabel)): [train_labels.append(j) for j in trainLabel[i]] class_number = len(np.unique(train_labels)) from sklearn.preprocessing import label_binarize y_total_40=label_binarize(train_labels, classes=[i for i in range(para.outputClassN)]) class_distribution_40_class=np.sum(y_total_40,axis=0) class_distribution_40_class=[float(i) for i in class_distribution_40_class] class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class) inverse_dist=1/class_distribution_40_class norm_inv_dist=inverse_dist/np.sum(inverse_dist) weights=norm_inv_dist*para.weight_scaler+1 weight_dict = dict() for classID, value in enumerate(weights): weight_dict.update({classID: value}) return weight_dict
Example #8
Source File: ABuML.py From abu with GNU General Public License v3.0 | 6 votes |
def cross_val_roc_auc_score(self, cv=10, **kwargs): """ 被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF,))装饰, 即支持有监督学习分类,使用cross_val_score对数据进行roc_auc度量,如果数据的y的 label标签 > 2,通过label_binarize将label标签进行二值化处理, 依次计算二值化的列的roc_auc,结果返回score最好的数据度量 :param cv: 透传cross_val_score的参数,默认10 :param kwargs: 外部可以传递x, y, 通过 x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) 确定传递self._do_cross_val_score中参数x,y, 以及装饰器使用的fiter_type,eg:ttn_abu.cross_val_roc_auc_score(fiter_type=ml.EMLFitType.E_FIT_REG) :return: cross_val_score返回的score序列, eg: array([ 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 0.95, 1. ]) """ x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) return self._do_cross_val_score(x, y, cv, _EMLScoreType.E_SCORE_ROC_AUC.value)
Example #9
Source File: data_utils.py From videograph with GNU General Public License v3.0 | 6 votes |
def __init__(self, feats_path, class_nums, n_classes, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10): random.seed(101) np.random.seed(101) self.__feats_pathes = feats_path self.__class_nums = class_nums self.__n_frames_per_video = n_frames_per_video self.__n_feat_maps = n_feat_maps self.__feat_map_side_dim = feat_map_side_dim self.__batch_size = batch_size # binarize the labels classes = range(1, n_classes + 1) self.__y = label_binarize(self.__class_nums, classes) self.__is_busy = False self.__batch_features = None self.__batch_y = None self.__n_threads_in_pool = n_threads self.__pool = Pool(self.__n_threads_in_pool)
Example #10
Source File: test_classification.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_matthews_corrcoef(): rng = np.random.RandomState(0) y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)] # corrcoef of same vectors must be 1 assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0) # corrcoef, when the two vectors are opposites of each other, should be -1 y_true_inv = ["b" if i == "a" else "a" for i in y_true] assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1) y_true_inv2 = label_binarize(y_true, ["a", "b"]) y_true_inv2 = np.where(y_true_inv2, 'a', 'b') assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1) # For the zero vector case, the corrcoef cannot be calculated and should # result in a RuntimeWarning mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered', matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0]) # But will output 0 assert_almost_equal(mcc, 0.) # And also for any other vector with 0 variance mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered', matthews_corrcoef, y_true, ['a'] * len(y_true)) # But will output 0 assert_almost_equal(mcc, 0.) # These two vectors have 0 correlation and hence mcc should be 0 y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1] y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1] assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.) # Check that sample weight is able to selectively exclude mask = [1] * 10 + [0] * 10 # Now the first half of the vector elements are alone given a weight of 1 # and hence the mcc will not be a perfect 0 as in the previous case assert_raises(AssertionError, assert_almost_equal, matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
Example #11
Source File: test_classification.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_precision_recall_f_extra_labels(): # Test handling of explicit additional (not in input) labels to PRF y_true = [1, 3, 3, 2] y_pred = [1, 1, 3, 2] y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) data = [(y_true, y_pred), (y_true_bin, y_pred_bin)] for i, (y_true, y_pred) in enumerate(data): # No average: zeros in array actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None) assert_array_almost_equal([0., 1., 1., .5, 0.], actual) # Macro average is changed actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro') assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual) # No effect otheriwse for average in ['micro', 'weighted', 'samples']: if average == 'samples' and i == 0: continue assert_almost_equal(recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average), recall_score(y_true, y_pred, labels=None, average=average)) # Error when introducing invalid label in multilabel case # (although it would only affect performance if average='macro'/None) for average in [None, 'macro', 'micro', 'samples']: assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin, labels=np.arange(6), average=average) assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
Example #12
Source File: utils.py From pysster with MIT License | 5 votes |
def performance_report(labels, predictions): from sklearn.preprocessing import label_binarize from sklearn.metrics import precision_recall_fscore_support classes = list(range(labels.shape[1])) roc_aucs, pr_aucs = [], [] if len(classes) == 2: roc_aucs = [auROC(labels[:, 0], predictions[:, 0])[2]] * 2 pr_aucs = [auPR(labels[:, 0], predictions[:, 0])[2]] * 2 labels = label_binarize(np.argmax(labels, axis = 1), classes = classes) else: for x in classes: roc_aucs.append(auROC(labels[:, x], predictions[:, x])[2]) pr_aucs.append(auPR(labels[:, x], predictions[:, x])[2]) if not np.isclose(np.sum(predictions, axis=1), 1).all(): # multi-label classification y_pred = predictions > 0.5 y_pred.dtype = np.uint8 else: y_pred = label_binarize(np.argmax(predictions, axis = 1), classes = classes) prec_recall_f1_support = precision_recall_fscore_support(labels, y_pred) report = np.empty((len(classes), 6)) for x in classes: report[x,:] = [prec_recall_f1_support[0][x], prec_recall_f1_support[1][x], prec_recall_f1_support[2][x], roc_aucs[x], pr_aucs[x], prec_recall_f1_support[3][x]] return report
Example #13
Source File: predict_deepchannel_QuB.py From Deep-Channel with MIT License | 5 votes |
def make_roc(gt,cpl,cl): from sklearn.preprocessing import label_binarize y_predict = label_binarize(gt, classes=[0, 1, 2, 3, 4, 5]) print('c=',cl) y = label_binarize(cl, classes=[0, 1, 2, 3, 4, 5]) n_classesi = y.shape[1] fpr = dict() tpr = dict() roc_auc = dict() from sklearn.metrics import roc_curve, auc for i in range(n_classesi): fpr[i], tpr[i], thre = roc_curve(y_predict[:, i], cpl[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) print('state=, {}, auc=,{}'.format(i,roc_auc[i]))
Example #14
Source File: metrics.py From delira with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, y_true, y_pred, **kwargs): """ Compute auroc Parameters ---------- y_true: np.ndarray ground truth data with shape (N) y_pred: np.ndarray predictions of network in numpy format with shape (N, nclasses) kwargs: variable number of keyword arguments passed to roc_auc_score Returns ------- float computes auc score Raises ------ ValueError if two classes are given and the predictions contain more than two classes """ # binary classification if len(self.classes) == 2: # single output unit (e.g. sigmoid) if len(y_pred.shape) == 1 or y_pred.shape[2] == 1: return roc_auc_score(y_true, y_pred, **kwargs) # output of two units (e.g. softmax) elif y_pred.shape[2] == 2: return roc_auc_score(y_true, y_pred[:, 1], **kwargs) else: raise ValueError("Can not compute auroc metric for binary " "classes with {} predicted " "classes.".format(y_pred.shape[2])) # classification with multiple classes if len(self.classes) > 2: y_true_bin = label_binarize(y_true, self.classes) return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs)
Example #15
Source File: struct_models.py From marseille with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _marg_rounded(self, x, y): y_node = y.nodes y_link = y.links Y_node = label_binarize(y_node, self.prop_encoder_.classes_) Y_link = label_binarize(y_link, self.link_encoder_.classes_) # XXX can this be avoided? Y_node, Y_link = map(_binary_2d, (Y_node, Y_link)) src_type = Y_node[x.link_to_prop[:, 0]] trg_type = Y_node[x.link_to_prop[:, 1]] if self.compat_features: pw = np.einsum('...j,...k,...l->...jkl', src_type, trg_type, Y_link) compat = np.tensordot(x.X_compat.T, pw, axes=[1, 0]) else: # equivalent to compat_features == np.ones(n_links) compat = np.einsum('ij,ik,il->jkl', src_type, trg_type, Y_link) second_order = [] if self.coparents_ or self.grandparents_ or self.siblings_: link = {(a, b): k for k, (a, b) in enumerate(x.link_to_prop)} if self.coparents_: second_order.extend(y_link[link[a, b]] & y_link[link[c, b]] for a, b, c in x.second_order) if self.grandparents_: second_order.extend(y_link[link[a, b]] & y_link[link[b, c]] for a, b, c in x.second_order) if self.siblings_: second_order.extend(y_link[link[b, a]] & y_link[link[b, c]] for a, b, c in x.second_order) second_order = np.array(second_order) return Y_node, Y_link, compat, second_order
Example #16
Source File: test.py From SpatioTemporalSegmentation with MIT License | 5 votes |
def average_precision(prob_np, target_np): num_class = prob_np.shape[1] label = label_binarize(target_np, classes=list(range(num_class))) with np.errstate(divide='ignore', invalid='ignore'): return average_precision_score(label, prob_np, None)
Example #17
Source File: examples.py From dython with BSD 3-Clause "New" or "Revised" License | 5 votes |
def roc_graph_example(): """ Plot an example ROC graph of an SVM model predictions over the Iris dataset. Based on sklearn examples (as was seen on April 2018): http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html """ # Load data iris = datasets.load_iris() X = iris.data y = label_binarize(iris.target, classes=[0, 1, 2]) # Add noisy features random_state = np.random.RandomState(4) n_samples, n_features = X.shape X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] # Train a model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=0)) # Predict y_score = classifier.fit(X_train, y_train).predict_proba(X_test) # Plot ROC graphs return roc_graph(y_test, y_score, class_names=iris.target_names)
Example #18
Source File: roc.py From RIDDLE with Apache License 2.0 | 5 votes |
def _compute_roc_stats(y_test, y_test_probas, num_class): """Compute ROC AUC statistics and visualize ROC curves. Arguments: y_test: [int] list of test class labels as integer indices y_test_probas: np.ndarray, float array of predicted probabilities with shape (num_sample, num_class) num_class: int number of classes Returns: roc_auc_dict: {int: float} dictionary mapping classes to ROC AUC scores fpr_dict: {string: np.ndarray} dictionary mapping names of classes or an averaging method to arrays of increasing false positive rates tpr_dict: {string: float} dictionary mapping names of classes or an averaging method to arrays of increasing true positive rates """ y_test = label_binarize(y_test, classes=range(0, num_class)) fpr_dict, tpr_dict, roc_auc_dict = {}, {}, {} for i in range(num_class): fpr_dict[i], tpr_dict[i], _ = roc_curve( y_test[:, i], y_test_probas[:, i]) roc_auc_dict[i] = auc(fpr_dict[i], tpr_dict[i]) # Compute micro-average ROC curve and ROC area fpr_dict["micro"], tpr_dict["micro"], _ = roc_curve( y_test.ravel(), y_test_probas.ravel()) roc_auc_dict["micro"] = auc(fpr_dict["micro"], tpr_dict["micro"]) return roc_auc_dict, fpr_dict, tpr_dict
Example #19
Source File: model.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License | 5 votes |
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion): test_loss = [] test_acc = [] test_predict = [] for i in range(len(inputCoor)): xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i] graphTest = graphTest.tocsr() labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)]) test_batch_size = para.testBatchSize for testBatchID in range(len(labelTest) / test_batch_size): start = testBatchID * test_batch_size end = start + test_batch_size batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end) batchWeight = uniform_weight(batchLabel) batchGraph = batchGraph.todense() feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0} predict, loss_test, acc_test = sess.run( [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict) test_loss.append(loss_test) test_acc.append(acc_test) test_predict.append(predict) test_average_loss = np.mean(test_loss) test_average_acc = np.mean(test_acc) return test_average_loss, test_average_acc, test_predict
Example #20
Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_matthews_corrcoef(): rng = np.random.RandomState(0) y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)] # corrcoef of same vectors must be 1 assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0) # corrcoef, when the two vectors are opposites of each other, should be -1 y_true_inv = ["b" if i == "a" else "a" for i in y_true] assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1) y_true_inv2 = label_binarize(y_true, ["a", "b"]) y_true_inv2 = np.where(y_true_inv2, 'a', 'b') assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1) # For the zero vector case, the corrcoef cannot be calculated and should # result in a RuntimeWarning mcc = assert_warns_div0(matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0]) # But will output 0 assert_almost_equal(mcc, 0.) # And also for any other vector with 0 variance mcc = assert_warns_div0(matthews_corrcoef, y_true, ['a'] * len(y_true)) # But will output 0 assert_almost_equal(mcc, 0.) # These two vectors have 0 correlation and hence mcc should be 0 y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1] y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1] assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.) # Check that sample weight is able to selectively exclude mask = [1] * 10 + [0] * 10 # Now the first half of the vector elements are alone given a weight of 1 # and hence the mcc will not be a perfect 0 as in the previous case assert_raises(AssertionError, assert_almost_equal, matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
Example #21
Source File: roc.py From sklearn-evaluation with MIT License | 4 votes |
def roc(y_true, y_score, ax=None): """ Plot ROC curve. Parameters ---------- y_true : array-like, shape = [n_samples] Correct target values (ground truth). y_score : array-like, shape = [n_samples] or [n_samples, 2] for binary classification or [n_samples, n_classes] for multiclass Target scores (estimator predictions). ax: matplotlib Axes Axes object to draw the plot onto, otherwise uses current Axes Notes ----- It is assumed that the y_score parameter columns are in order. For example, if ``y_true = [2, 2, 1, 0, 0, 1, 2]``, then the first column in y_score must countain the scores for class 0, second column for class 1 and so on. Returns ------- ax: matplotlib Axes Axes containing the plot Examples -------- .. plot:: ../../examples/roc.py """ if any((val is None for val in (y_true, y_score))): raise ValueError("y_true and y_score are needed to plot ROC") if ax is None: ax = plt.gca() # get the number of classes based on the shape of y_score y_score_is_vector = is_column_vector(y_score) or is_row_vector(y_score) if y_score_is_vector: n_classes = 2 else: _, n_classes = y_score.shape # check data shape? if n_classes > 2: # convert y_true to binary format y_true_bin = label_binarize(y_true, classes=np.unique(y_true)) _roc_multi(y_true_bin, y_score, ax=ax) for i in range(n_classes): _roc(y_true_bin[:, i], y_score[:, i], ax=ax) else: if y_score_is_vector: _roc(y_true, y_score, ax) else: _roc(y_true, y_score[:, 1], ax) # raise error if n_classes = 1? return ax
Example #22
Source File: model.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License | 4 votes |
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate): dataChunkLoss = [] dataChunkAcc = [] dataChunkRegLoss = [] for i in range(len(inputCoor)): xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i] graphTrain_1 = graphTrain_1.tocsr() labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)]) xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize) # labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)]) batch_loss = [] batch_acc = [] batch_reg = [] batchSize = para.batchSize for batchID in range(len(labelBinarize) / para.batchSize): start = batchID * batchSize end = start + batchSize batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end) batchGraph = batchGraph.todense() batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02) if para.weighting_scheme == 'uniform': batchWeight = uniform_weight(batchLabel) elif para.weighting_scheme == 'weighted': batchWeight = weights_calculation(batchLabel, weight_dict) else: print 'please enter the valid weighting scheme' #print batchWeight feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2} opt, loss_train, acc_train, loss_reg_train = sess.run( [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']], feed_dict=feed_dict) #print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train)) batch_loss.append(loss_train) batch_acc.append(acc_train) batch_reg.append(loss_reg_train) dataChunkLoss.append(np.mean(batch_loss)) dataChunkAcc.append(np.mean(batch_acc)) dataChunkRegLoss.append(np.mean(batch_reg)) train_average_loss = np.mean(dataChunkLoss) train_average_acc = np.mean(dataChunkAcc) loss_reg_average = np.mean(dataChunkRegLoss) return train_average_loss, train_average_acc, loss_reg_average
Example #23
Source File: model_multi_res.py From Graph-CNN-in-3D-Point-Cloud-Classification with MIT License | 4 votes |
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion): # Description: Performance on the test set data # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1) # (4) para: global Parameters (5) sess: Session (6) trainOperaion: placeholder dictionary # Return: average loss, acc, regularization loss for test set test_loss = [] test_acc = [] test_predict = [] for i in range(len(inputCoor)): xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i] graphTest = graphTest.tocsr() labelBinarize = label_binarize(labelTest, classes=[j for j in range(40)]) test_batch_size = para.testBatchSize for testBatchID in range(len(labelTest) / test_batch_size): start = testBatchID * test_batch_size end = start + test_batch_size batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end) batchWeight = uniform_weight(batchLabel) batchGraph = batchGraph.todense() batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1, k=para.nearestNeighborL1, batch_size=test_batch_size, nodes_n=para.pointNumber) batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = test_batch_size, M = para.clusterNumberL1) feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0, trainOperaion['batch_index_l1']: batchIndexL1, trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: test_batch_size } predict, loss_test, acc_test = sess.run( [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict) test_loss.append(loss_test) test_acc.append(acc_test) test_predict.append(predict) test_average_loss = np.mean(test_loss) test_average_acc = np.mean(test_acc) return test_average_loss, test_average_acc, test_predict
Example #24
Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_precision_recall_f_extra_labels(): # Test handling of explicit additional (not in input) labels to PRF y_true = [1, 3, 3, 2] y_pred = [1, 1, 3, 2] y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) data = [(y_true, y_pred), (y_true_bin, y_pred_bin)] for i, (y_true, y_pred) in enumerate(data): # No average: zeros in array actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None) assert_array_almost_equal([0., 1., 1., .5, 0.], actual) # Macro average is changed actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro') assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual) # No effect otheriwse for average in ['micro', 'weighted', 'samples']: if average == 'samples' and i == 0: continue assert_almost_equal(recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average), recall_score(y_true, y_pred, labels=None, average=average)) # Error when introducing invalid label in multilabel case # (although it would only affect performance if average='macro'/None) for average in [None, 'macro', 'micro', 'samples']: assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin, labels=np.arange(6), average=average) assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average) # tests non-regression on issue #10307 y_true = np.array([[0, 1, 1], [1, 0, 0]]) y_pred = np.array([[1, 1, 1], [1, 0, 1]]) p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='samples', labels=[0, 1]) assert_almost_equal(np.array([p, r, f]), np.array([3 / 4, 1, 5 / 6]))