Python sklearn.utils.multiclass.unique_labels() Examples
The following are 30
code examples of sklearn.utils.multiclass.unique_labels().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.multiclass
, or try the search function
.
Example #1
Source File: sssrm.py From brainiak with Apache License 2.0 | 6 votes |
def _init_classes(self, y): """Map all possible classes to the range [0,..,C-1] Parameters ---------- y : list of arrays of int, each element has shape=[samples_i,] Labels of the samples for each subject Returns ------- new_y : list of arrays of int, each element has shape=[samples_i,] Mapped labels of the samples for each subject Note ---- The mapping of the classes is saved in the attribute classes_. """ self.classes_ = unique_labels(utils.concatenate_not_none(y)) new_y = [None] * len(y) for s in range(len(y)): new_y[s] = np.digitize(y[s], self.classes_) - 1 return new_y
Example #2
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, X, y, mode='LAL_iterative', data_path='.', cls_est=50, train_slt=True, **kwargs): super(QueryInstanceLAL, self).__init__(X, y) if len(unique_labels(self.y)) != 2: warnings.warn("This query strategy is implemented for binary classification only.", category=FunctionWarning) if not os.path.isdir(data_path): raise ValueError("Please pass the directory of the file.") self._iter_path = os.path.join(data_path, 'LAL-iterativetree-simulatedunbalanced-big.npz') self._rand_path = os.path.join(data_path, 'LAL-randomtree-simulatedunbalanced-big.npz') assert mode in ['LAL_iterative', 'LAL_independent'] self._mode = mode self._selector = None self.model = RandomForestClassifier(n_estimators=cls_est, oob_score=True, n_jobs=8) if train_slt: self.download_data() self.train_selector_from_file()
Example #3
Source File: confusion_balancer.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y): """ Fit the data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self.estimator, dtype=FLOAT_DTYPES) if not isinstance(self.estimator, ProbabilisticClassifier): raise ValueError( "The ConfusionBalancer meta model only works on classifcation models with .predict_proba." ) self.estimator.fit(X, y) self.classes_ = unique_labels(y) cfm = confusion_matrix(y, self.estimator.predict(X)).T + self.cfm_smooth self.cfm_ = cfm / cfm.sum(axis=1).reshape(-1, 1) return self
Example #4
Source File: shapelets.py From tslearn with BSD 2-Clause "Simplified" License | 6 votes |
def _preprocess_labels(self, y): self.classes_ = unique_labels(y) n_labels = len(self.classes_) if n_labels == 1: raise ValueError("Classifier can't train when only one class " "is present.") if self.classes_.dtype in [numpy.int32, numpy.int64]: self.label_to_ind_ = {int(lab): ind for ind, lab in enumerate(self.classes_)} else: self.label_to_ind_ = {lab: ind for ind, lab in enumerate(self.classes_)} y_ind = numpy.array( [self.label_to_ind_[lab] for lab in y] ) y_ = to_categorical(y_ind) if n_labels == 2: y_ = y_[:, 1:] # Keep only indicator of positive class return y_
Example #5
Source File: test.py From rasa_core with Apache License 2.0 | 6 votes |
def plot_story_evaluation(test_y, predictions, report, precision, f1, accuracy, in_training_data_fraction, out_directory): """Plot the results of story evaluation""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt from rasa_nlu.test import plot_confusion_matrix log_evaluation_table(test_y, "ACTION", report, precision, f1, accuracy, in_training_data_fraction, include_report=True) cnf_matrix = confusion_matrix(test_y, predictions) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, predictions), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches='tight')
Example #6
Source File: labels.py From pumpp with ISC License | 6 votes |
def fit(self, y): """Fit label binarizer Parameters ---------- y : array of shape [n_samples,] or [n_samples, n_classes] Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification. Returns ------- self : returns an instance of self. """ self.y_type_ = type_of_target(y) if 'multioutput' in self.y_type_: raise ValueError("Multioutput target data is not supported with " "label binarization") if _num_samples(y) == 0: raise ValueError('y has 0 samples: %r' % y) self.sparse_input_ = sp.issparse(y) self.classes_ = unique_labels(y) return self
Example #7
Source File: evaluate.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def evaluate_intents(targets, predictions): # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Only considers those examples with a set intent. Others are filtered out.""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt # remove empty intent targets num_examples = len(targets) targets, predictions = remove_empty_intent_examples(targets, predictions) logger.info("Intent Evaluation: Only considering those " "{} examples that have a defined intent out " "of {} examples".format(targets.size, num_examples)) log_evaluation_table(targets, predictions) cnf_matrix = confusion_matrix(targets, predictions) labels = unique_labels(targets, predictions) plot_confusion_matrix(cnf_matrix, classes=labels, title='Intent Confusion matrix') plt.show()
Example #8
Source File: _template.py From project-template with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """A reference implementation of a fitting function for a classifier. Parameters ---------- X : array-like, shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) The target values. An array of int. Returns ------- self : object Returns self. """ # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # Return the classifier return self
Example #9
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_unique_labels_non_specific(): # Test unique_labels with a variety of collected examples # Smoke test for all supported format for format in ["binary", "multiclass", "multilabel-indicator"]: for y in EXAMPLES[format]: unique_labels(y) # We don't support those format at the moment for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, unique_labels, example) for y_type in ["unknown", "continuous", 'continuous-multioutput', 'multiclass-multioutput']: for example in EXAMPLES[y_type]: assert_raises(ValueError, unique_labels, example)
Example #10
Source File: test_multiclass.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_unique_labels_non_specific(): # Test unique_labels with a variety of collected examples # Smoke test for all supported format for format in ["binary", "multiclass", "multilabel-indicator"]: for y in EXAMPLES[format]: unique_labels(y) # We don't support those format at the moment for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, unique_labels, example) for y_type in ["unknown", "continuous", 'continuous-multioutput', 'multiclass-multioutput']: for example in EXAMPLES[y_type]: assert_raises(ValueError, unique_labels, example)
Example #11
Source File: evaluate.py From rasa_wechat with Apache License 2.0 | 6 votes |
def run_story_evaluation(story_file, policy_model_path, nlu_model_path, out_file, max_stories): """Run the evaluation of the stories, plots the results.""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels test_y, preds = collect_story_predictions(story_file, policy_model_path, nlu_model_path, max_stories) log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(out_file, bbox_inches='tight')
Example #12
Source File: test_multiclass.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_unique_labels_mixed_types(): # Mix with binary or multiclass and multilabel mix_clf_format = product(EXAMPLES["multilabel-indicator"], EXAMPLES["multiclass"] + EXAMPLES["binary"]) for y_multilabel, y_multiclass in mix_clf_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]]) assert_raises(ValueError, unique_labels, ["1", 2]) assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]]) assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]])
Example #13
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) self.num_fit_cols_ = X.shape[1] for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] self.gmms_[c] = [ GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, ).fit(subset_x[:, i].reshape(-1, 1), subset_y) for i in range(X.shape[1]) ] return self
Example #14
Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = BayesianGaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weight_concentration_prior_type=self.weight_concentration_prior_type, weight_concentration_prior=self.weight_concentration_prior, mean_precision_prior=self.mean_precision_prior, mean_prior=self.mean_prior, degrees_of_freedom_prior=self.degrees_of_freedom_prior, covariance_prior=self.covariance_prior, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #15
Source File: gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #16
Source File: test_multiclass.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(xrange(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabel indicator assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))
Example #17
Source File: neighbors.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.ndarray, y: np.ndarray): """ Fit the model using X, y as training data. :param X: array-like, shape=(n_features, n_samples) :param y: array-like, shape=(n_samples) :return: Returns an instance of self """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.classes_ = unique_labels(y) self.models_, self.priors_logp_ = {}, {} for target_label in self.classes_: x_subset = X[y == target_label] # Computing joint distribution self.models_[target_label] = KernelDensity( bandwidth=self.bandwidth, kernel=self.kernel, algorithm=self.algorithm, metric=self.metric, atol=self.atol, rtol=self.rtol, breadth_first=self.breath_first, leaf_size=self.leaf_size, metric_params=self.metric_params, ).fit(x_subset) # Computing target class prior self.priors_logp_[target_label] = np.log(len(x_subset) / len(X)) return self
Example #18
Source File: test_split.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_split1_allclass(): train_idx, test_idx, label_idx, unlabel_idx = split(X=X, y=y, all_class=True, split_count=split_count, test_ratio=0.3, initial_label_rate=0.05, saving_path=None, query_type='AllLabels') assert len(train_idx) == split_count assert len(test_idx) == split_count assert len(label_idx) == split_count assert len(unlabel_idx) == split_count for i in range(split_count): train = set(train_idx[i]) test = set(test_idx[i]) lab = set(label_idx[i]) unl = set(unlabel_idx[i]) assert len(test) == round(0.3 * instance_num) assert len(lab) == round(0.05 * len(train)) # validity traintest = train.union(test) labun = lab.union(unl) assert traintest == set(range(instance_num)) assert labun == train # is all-class len(unique_labels(y[label_idx[i]])) == label_num
Example #19
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_unique_labels_mixed_types(): # Mix with binary or multiclass and multilabel mix_clf_format = product(EXAMPLES["multilabel-indicator"], EXAMPLES["multiclass"] + EXAMPLES["binary"]) for y_multilabel, y_multiclass in mix_clf_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]]) assert_raises(ValueError, unique_labels, ["1", 2]) assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]]) assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]])
Example #20
Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(range(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabel indicator assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))
Example #21
Source File: robust_soft_learning_vector_quantization.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def partial_fit(self, X, y, classes=None, sample_weight=None): """Fit the LVQ model to the given training data and parameters using gradient ascent. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : numpy.ndarray of shape (n_samples, n_targets) An array-like with the class labels of all samples in X classes : numpy.ndarray, optional (default=None) Contains all possible/known class labels. Usage varies depending on the learning method. sample_weight : Not used. Returns -------- self """ if set(unique_labels(y)).issubset(set(self.classes_)) or \ self.initial_fit is True: X, y = self._validate_train_parms(X, y, classes=classes) else: raise ValueError('Class {} was not learned - please declare all \ classes in first call of fit/partial_fit' .format(y)) self._optimize(X, y) return self
Example #22
Source File: metrics.py From hyperparameter_hunter with MIT License | 4 votes |
def classify_output(target, prediction): """Force continuous `prediction` into the discrete, classified space of `target`. This is not an output/feature transformer akin to SKLearn's discretization transformers. This function is intended for use in the very specific case of having a `target` that is classification-like ("binary", "multiclass", etc.), with `prediction` that resembles a "continuous" target, despite being made for `target`. The most common reason for this occurrence is that `prediction` is actually the division-averaged predictions collected along the course of a :class:`~hyperparameter_hunter.experiments.CVExperiment`. In this case, the original model predictions should have been classification-like; however, due to disagreement in the division predictions, the resulting average predictions appear to be continuous Parameters ---------- target: Array-like # TODO: ... prediction: Array-like # TODO: ... Returns ------- numpy.array # TODO: ... Notes ----- Target types used by this function are defined by `sklearn.utils.multiclass.type_of_target`. If a `prediction` value is exactly between two `target` values, it will assume the lower of the two values. For example, given a single prediction of 1.5 and unique `labels` of [0, 1, 2, 3], the value of that prediction will be 1, rather than 2 Examples -------- >>> import numpy as np >>> classify_output(np.array([0, 3, 1, 2]), [0.5, 1.51, 0.66, 4.9]) array([0, 2, 1, 3]) >>> classify_output(np.array([0, 1, 2, 3]), [0.5, 1.51, 0.66, 4.9]) array([0, 2, 1, 3]) >>> # TODO: ... Add more examples, including binary classification """ # MARK: Might be ignoring 1-dimensional, label encodings, like 2nd case in `test_get_clean_prediction`: # ([1, 0, 1, 0], [0.9, 0.1, 0.8, 0.2], [1.0, 0.0, 1.0, 0.0]) labels = unique_labels(target) # FLAG: ORIGINAL # labels = unique_labels(*target) # FLAG: TEST return np.array([labels[(np.abs(labels - _)).argmin()] for _ in prediction]) ################################################## # Miscellaneous Utilities ##################################################
Example #23
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, X, y, mu=0.1, gamma=0.1, rho=1, lambda_init=0.1, lambda_pace=0.01, **kwargs): try: import cvxpy self._cvxpy = cvxpy except: raise ImportError("This method need cvxpy to solve the QP problem." "Please refer to https://www.cvxpy.org/install/index.html " "install cvxpy manually before using.") # K: kernel matrix super(QueryInstanceSPAL, self).__init__(X, y) ul = unique_labels(self.y) if len(unique_labels(self.y)) != 2: warnings.warn("This query strategy is implemented for binary classification only.", category=FunctionWarning) if len(ul) == 2 and {1, -1} != set(ul): y_temp = np.array(copy.deepcopy(self.y)) y_temp[y_temp == ul[0]] = 1 y_temp[y_temp == ul[1]] = -1 self.y = y_temp self._mu = mu self._gamma = gamma self._rho = rho self._lambda_init = lambda_init self._lambda_pace = lambda_pace self._lambda = lambda_init # calc kernel self._kernel = kwargs.pop('kernel', 'rbf') if self._kernel == 'rbf': self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'poly': self._K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'linear': self._K = linear_kernel(X=X, Y=X) elif hasattr(self._kernel, '__call__'): self._K = self._kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self._K, np.ndarray): raise TypeError('K should be an ndarray') if self._K.shape != (len(X), len(X)): raise ValueError( 'kernel should have size (%d, %d)' % (len(X), len(X)))
Example #24
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, X, y, beta=1000, gamma=0.1, rho=1, **kwargs): try: import cvxpy self._cvxpy = cvxpy except: raise ImportError("This method need cvxpy to solve the QP problem." "Please refer to https://www.cvxpy.org/install/index.html " "install cvxpy manually before using.") # K: kernel matrix super(QueryInstanceBMDR, self).__init__(X, y) ul = unique_labels(self.y) if len(ul) != 2: warnings.warn("This query strategy is implemented for binary classification only.", category=FunctionWarning) if len(ul) == 2 and {1, -1} != set(ul): y_temp = np.array(copy.deepcopy(self.y)) y_temp[y_temp == ul[0]] = 1 y_temp[y_temp == ul[1]] = -1 self.y = y_temp self._beta = beta self._gamma = gamma self._rho = rho # calc kernel self._kernel = kwargs.pop('kernel', 'rbf') if self._kernel == 'rbf': self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'poly': self._K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'linear': self._K = linear_kernel(X=X, Y=X) elif hasattr(self._kernel, '__call__'): self._K = self._kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self._K, np.ndarray): raise TypeError('K should be an ndarray') if self._K.shape != (len(X), len(X)): raise ValueError( 'kernel should have size (%d, %d)' % (len(X), len(X)))
Example #25
Source File: ukp_evaluation.py From acl2019-BERT-argument-classification-and-clustering with Apache License 2.0 | 4 votes |
def analyze_predictions(filepath): total_sent = 0 correct_sent = 0 count = {} y_true = [] y_pred = [] for line in open(filepath, encoding='utf8'): splits = line.strip().split("\t") gold = splits[0] pred = splits[1] total_sent += 1 if gold == pred: correct_sent += 1 if gold not in count: count[gold] = {} if pred not in count[gold]: count[gold][pred] = 0 count[gold][pred] += 1 y_true.append(gold) y_pred.append(pred) print("gold - pred - Confusion Matrix") for gold_label in sorted(count.keys()): for pred_label in sorted(count[gold_label].keys()): print("%s - %s: %d" % (gold_label, pred_label, count[gold_label][pred_label])) print(":: BERT ::") print("Acc: %.2f%%" % (correct_sent/total_sent*100) ) labels = unique_labels(y_true, y_pred) prec = precision_score(y_true, y_pred, average=None) rec = recall_score(y_true, y_pred, average=None) f1 = f1_score(y_true, y_pred, average=None) arg_f1 = [] for idx, label in enumerate(labels): print("\n:: F1 for "+label+" ::") print("Prec: %.2f%%" % (prec[idx]*100)) print("Recall: %.2f%%" % (rec[idx]*100)) print("F1: %.2f%%" % (f1[idx]*100)) if label in labels: if label != 'NoArgument': arg_f1.append(f1[idx]) print("\n:: Macro Weighted for all ::") print("F1: %.2f%%" % (np.mean(f1)*100)) prec_mapping = {key:value for key, value in zip(labels, prec)} rec_mapping = {key:value for key, value in zip(labels, rec)} return np.mean(f1), prec_mapping, rec_mapping
Example #26
Source File: plotting.py From U-Time with MIT License | 4 votes |
def plot_confusion_matrix(y_true, y_pred, n_classes, normalize=False, id_=None, cmap="Blues"): """ Adapted from sklearn 'plot_confusion_matrix.py'. This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels if normalize: title = 'Normalized confusion matrix for identifier {}'.format(id_ or "???") else: title = 'Confusion matrix, without normalization for identifier {}' \ ''.format(id_ or "???") # Compute confusion matrix classes = np.arange(n_classes) cm = confusion_matrix(y_true, y_pred) classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # Get transformed labels from utime import defaults labels = [defaults.class_int_to_stage_string[i] for i in classes] fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=plt.get_cmap(cmap)) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=labels, yticklabels=labels, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.3f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return fig, ax
Example #27
Source File: lib_plot.py From Realtime-Action-Recognition with MIT License | 4 votes |
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, size=None): """ (Copied from sklearn website) This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Display normalized confusion matrix ...") else: print('Display confusion matrix without normalization ...') # print(cm) fig, ax = plt.subplots() if size is None: size = (12, 8) fig.set_size_inches(size[0], size[1]) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') ax.set_ylim([-0.5, len(classes)-0.5]) # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax, cm # Drawings ==============================================================
Example #28
Source File: test.py From rasa-for-botfront with Apache License 2.0 | 4 votes |
def plot_story_evaluation( test_y, predictions, report, precision, f1, accuracy, in_training_data_fraction, out_directory, disable_plotting, ): """Plot the results of story evaluation""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt from rasa.nlu.test import plot_confusion_matrix log_evaluation_table( test_y, "ACTION", report, precision, f1, accuracy, in_training_data_fraction, include_report=True, ) if disable_plotting: return cnf_matrix = confusion_matrix(test_y, predictions) plot_confusion_matrix( cnf_matrix, classes=unique_labels(test_y, predictions), title="Action Confusion matrix", ) fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches="tight")
Example #29
Source File: classification_metrics_utils.py From sciwing with MIT License | 4 votes |
def get_confusion_matrix_and_labels( predicted_tag_indices: List[List[int]], true_tag_indices: List[List[int]], true_masked_label_indices: List[List[int]], pred_labels_mask: List[List[int]] = None, ) -> (np.array, List[int]): """ Gets the confusion matrix and the list of classes for which the confusion matrix is generated Parameters ---------- predicted_tag_indices : List[List[int]] Predicted tag indices for a batch true_tag_indices : List[List[int]] True tag indices for a batch true_masked_label_indices : List[List[int]] Every integer is either a 0 or 1, where 1 will indicate that the label in `true_tag_indices` will be ignored """ # get the masked label indices true_masked_label_indices = torch.BoolTensor(true_masked_label_indices).cpu() # select the elements in true tag indices where mask is 1 # these classes will not be considered for calculating the metrics true_masked_label_indices = torch.masked_select( torch.tensor(true_tag_indices, dtype=torch.long), true_masked_label_indices ) true_masked_label_indices = list(set(true_masked_label_indices.tolist())) masked_classes = true_masked_label_indices # do the same for pred labels if pred_labels_mask is not None: pred_mask_label_indices = torch.BoolTensor(pred_labels_mask).cpu() pred_mask_label_indices = torch.masked_select( torch.tensor(predicted_tag_indices, dtype=torch.long), pred_mask_label_indices, ) pred_mask_label_indices = list(set(pred_mask_label_indices.tolist())) masked_classes = masked_classes + pred_mask_label_indices # get the set of unique classes predicted_tags_flat = list(itertools.chain.from_iterable(predicted_tag_indices)) labels = list(itertools.chain.from_iterable(true_tag_indices)) predicted_tags_flat = np.array(predicted_tags_flat) labels_numpy = np.array(labels) classes = unique_labels(labels_numpy, predicted_tags_flat) classes = filter(lambda class_: class_ not in masked_classes, classes) classes = list(classes) confusion_mtrx = confusion_matrix( labels_numpy, predicted_tags_flat, labels=classes ) return confusion_mtrx, classes
Example #30
Source File: PlotClass.py From ldgcnn with MIT License | 4 votes |
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig = plt.figure(figsize= (14,10.8)) ax = fig.add_subplot(111) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. # fmt = '.2f' if normalize else 'd' # thresh = cm.max() / 2. # for i in range(cm.shape[0]): # for j in range(cm.shape[1]): # ax.text(j, i, format(cm[i, j], fmt), # ha="center", va="center", # color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return cm,ax