Python Examples of sklearn.utils.multiclass.unique

Source File: sssrm.py From brainiak with Apache License 2.0

6 votes

def _init_classes(self, y):
        """Map all possible classes to the range [0,..,C-1]

        Parameters
        ----------

        y : list of arrays of int, each element has shape=[samples_i,]
            Labels of the samples for each subject


        Returns
        -------
        new_y : list of arrays of int, each element has shape=[samples_i,]
            Mapped labels of the samples for each subject

        Note
        ----
            The mapping of the classes is saved in the attribute classes_.
        """
        self.classes_ = unique_labels(utils.concatenate_not_none(y))
        new_y = [None] * len(y)
        for s in range(len(y)):
            new_y[s] = np.digitize(y[s], self.classes_) - 1
        return new_y

Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, X, y, mode='LAL_iterative', data_path='.', cls_est=50, train_slt=True, **kwargs):
        super(QueryInstanceLAL, self).__init__(X, y)
        if len(unique_labels(self.y)) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if not os.path.isdir(data_path):
            raise ValueError("Please pass the directory of the file.")
        self._iter_path = os.path.join(data_path, 'LAL-iterativetree-simulatedunbalanced-big.npz')
        self._rand_path = os.path.join(data_path, 'LAL-randomtree-simulatedunbalanced-big.npz')
        assert mode in ['LAL_iterative', 'LAL_independent']
        self._mode = mode
        self._selector = None
        self.model = RandomForestClassifier(n_estimators=cls_est, oob_score=True, n_jobs=8)
        if train_slt:
            self.download_data()
            self.train_selector_from_file()

Source File: confusion_balancer.py From scikit-lego with MIT License

6 votes

def fit(self, X, y):
        """
        Fit the data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self.estimator, dtype=FLOAT_DTYPES)
        if not isinstance(self.estimator, ProbabilisticClassifier):
            raise ValueError(
                "The ConfusionBalancer meta model only works on classifcation models with .predict_proba."
            )
        self.estimator.fit(X, y)
        self.classes_ = unique_labels(y)
        cfm = confusion_matrix(y, self.estimator.predict(X)).T + self.cfm_smooth
        self.cfm_ = cfm / cfm.sum(axis=1).reshape(-1, 1)
        return self

Source File: shapelets.py From tslearn with BSD 2-Clause "Simplified" License

6 votes

def _preprocess_labels(self, y):
        self.classes_ = unique_labels(y)
        n_labels = len(self.classes_)
        if n_labels == 1:
            raise ValueError("Classifier can't train when only one class "
                             "is present.")
        if self.classes_.dtype in [numpy.int32, numpy.int64]:
            self.label_to_ind_ = {int(lab): ind
                                  for ind, lab in enumerate(self.classes_)}
        else:
            self.label_to_ind_ = {lab: ind
                                  for ind, lab in enumerate(self.classes_)}
        y_ind = numpy.array(
            [self.label_to_ind_[lab] for lab in y]
        )
        y_ = to_categorical(y_ind)
        if n_labels == 2:
            y_ = y_[:, 1:]  # Keep only indicator of positive class
        return y_

Source File: test.py From rasa_core with Apache License 2.0

6 votes

def plot_story_evaluation(test_y, predictions,
                          report, precision, f1, accuracy,
                          in_training_data_fraction,
                          out_directory):
    """Plot the results of story evaluation"""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt
    from rasa_nlu.test import plot_confusion_matrix

    log_evaluation_table(test_y, "ACTION",
                         report, precision, f1, accuracy,
                         in_training_data_fraction,
                         include_report=True)

    cnf_matrix = confusion_matrix(test_y, predictions)

    plot_confusion_matrix(cnf_matrix,
                          classes=unique_labels(test_y, predictions),
                          title='Action Confusion matrix')

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(os.path.join(out_directory, "story_confmat.pdf"),
                bbox_inches='tight')

Source File: labels.py From pumpp with ISC License

6 votes

def fit(self, y):
        """Fit label binarizer

        Parameters
        ----------
        y : array of shape [n_samples,] or [n_samples, n_classes]
            Target values. The 2-d matrix should only contain 0 and 1,
            represents multilabel classification.

        Returns
        -------
        self : returns an instance of self.
        """
        self.y_type_ = type_of_target(y)
        if 'multioutput' in self.y_type_:
            raise ValueError("Multioutput target data is not supported with "
                             "label binarization")
        if _num_samples(y) == 0:
            raise ValueError('y has 0 samples: %r' % y)

        self.sparse_input_ = sp.issparse(y)
        self.classes_ = unique_labels(y)
        return self

Source File: evaluate.py From Rasa_NLU_Chi with Apache License 2.0

6 votes

def evaluate_intents(targets, predictions):  # pragma: no cover
    """Creates a confusion matrix and summary statistics for intent predictions.

    Only considers those examples with a set intent.
    Others are filtered out."""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt

    # remove empty intent targets
    num_examples = len(targets)
    targets, predictions = remove_empty_intent_examples(targets, predictions)
    logger.info("Intent Evaluation: Only considering those "
                "{} examples that have a defined intent out "
                "of {} examples".format(targets.size, num_examples))
    log_evaluation_table(targets, predictions)

    cnf_matrix = confusion_matrix(targets, predictions)
    labels = unique_labels(targets, predictions)
    plot_confusion_matrix(cnf_matrix,
                          classes=labels,
                          title='Intent Confusion matrix')

    plt.show()

Source File: _template.py From project-template with BSD 3-Clause "New" or "Revised" License

6 votes

def fit(self, X, y):
        """A reference implementation of a fitting function for a classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,)
            The target values. An array of int.

        Returns
        -------
        self : object
            Returns self.
        """
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        # Return the classifier
        return self

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_unique_labels_non_specific():
    # Test unique_labels with a variety of collected examples

    # Smoke test for all supported format
    for format in ["binary", "multiclass", "multilabel-indicator"]:
        for y in EXAMPLES[format]:
            unique_labels(y)

    # We don't support those format at the moment
    for example in NON_ARRAY_LIKE_EXAMPLES:
        assert_raises(ValueError, unique_labels, example)

    for y_type in ["unknown", "continuous", 'continuous-multioutput',
                   'multiclass-multioutput']:
        for example in EXAMPLES[y_type]:
            assert_raises(ValueError, unique_labels, example)

Source File: test_multiclass.py From twitter-stock-recommendation with MIT License

6 votes

def test_unique_labels_non_specific():
    # Test unique_labels with a variety of collected examples

    # Smoke test for all supported format
    for format in ["binary", "multiclass", "multilabel-indicator"]:
        for y in EXAMPLES[format]:
            unique_labels(y)

    # We don't support those format at the moment
    for example in NON_ARRAY_LIKE_EXAMPLES:
        assert_raises(ValueError, unique_labels, example)

    for y_type in ["unknown", "continuous", 'continuous-multioutput',
                   'multiclass-multioutput']:
        for example in EXAMPLES[y_type]:
            assert_raises(ValueError, unique_labels, example)

Source File: evaluate.py From rasa_wechat with Apache License 2.0

6 votes

def run_story_evaluation(story_file, policy_model_path, nlu_model_path,
                         out_file, max_stories):
    """Run the evaluation of the stories, plots the results."""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels

    test_y, preds = collect_story_predictions(story_file, policy_model_path,
                                              nlu_model_path, max_stories)

    log_evaluation_table(test_y, preds)
    cnf_matrix = confusion_matrix(test_y, preds)
    plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds),
                          title='Action Confusion matrix')

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(out_file, bbox_inches='tight')

Source File: test_multiclass.py From twitter-stock-recommendation with MIT License

5 votes

def test_unique_labels_mixed_types():
    # Mix with binary or multiclass and multilabel
    mix_clf_format = product(EXAMPLES["multilabel-indicator"],
                             EXAMPLES["multiclass"] +
                             EXAMPLES["binary"])

    for y_multilabel, y_multiclass in mix_clf_format:
        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

    assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]])
    assert_raises(ValueError, unique_labels, ["1", 2])
    assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]])
    assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]])

Source File: naive_bayes.py From scikit-lego with MIT License

5 votes

def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        self.num_fit_cols_ = X.shape[1]
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            self.gmms_[c] = [
                GaussianMixture(
                    n_components=self.n_components,
                    covariance_type=self.covariance_type,
                    tol=self.tol,
                    reg_covar=self.reg_covar,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    init_params=self.init_params,
                    weights_init=self.weights_init,
                    means_init=self.means_init,
                    precisions_init=self.precisions_init,
                    random_state=self.random_state,
                    warm_start=self.warm_start,
                ).fit(subset_x[:, i].reshape(-1, 1), subset_y)
                for i in range(X.shape[1])
            ]
        return self

Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License

5 votes

def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = BayesianGaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weight_concentration_prior_type=self.weight_concentration_prior_type,
                weight_concentration_prior=self.weight_concentration_prior,
                mean_precision_prior=self.mean_precision_prior,
                mean_prior=self.mean_prior,
                degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                covariance_prior=self.covariance_prior,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self

Source File: gmm_classifier.py From scikit-lego with MIT License

5 votes

def fit(self, X: np.array, y: np.array) -> "GMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = GaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weights_init=self.weights_init,
                means_init=self.means_init,
                precisions_init=self.precisions_init,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self

Source File: test_multiclass.py From twitter-stock-recommendation with MIT License

5 votes

def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [1, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)),
                       np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                       np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5))

Source File: neighbors.py From scikit-lego with MIT License

5 votes

def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_features, n_samples)
        :param y: array-like, shape=(n_samples)
        :return: Returns an instance of self
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)

        self.classes_ = unique_labels(y)
        self.models_, self.priors_logp_ = {}, {}
        for target_label in self.classes_:
            x_subset = X[y == target_label]

            # Computing joint distribution
            self.models_[target_label] = KernelDensity(
                bandwidth=self.bandwidth,
                kernel=self.kernel,
                algorithm=self.algorithm,
                metric=self.metric,
                atol=self.atol,
                rtol=self.rtol,
                breadth_first=self.breath_first,
                leaf_size=self.leaf_size,
                metric_params=self.metric_params,
            ).fit(x_subset)

            # Computing target class prior
            self.priors_logp_[target_label] = np.log(len(x_subset) / len(X))

        return self

Source File: test_split.py From ALiPy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_split1_allclass():
    train_idx, test_idx, label_idx, unlabel_idx = split(X=X,
                                                        y=y,
                                                        all_class=True, split_count=split_count,
                                                        test_ratio=0.3, initial_label_rate=0.05,
                                                        saving_path=None,
                                                        query_type='AllLabels')
    assert len(train_idx) == split_count
    assert len(test_idx) == split_count
    assert len(label_idx) == split_count
    assert len(unlabel_idx) == split_count

    for i in range(split_count):
        train = set(train_idx[i])
        test = set(test_idx[i])
        lab = set(label_idx[i])
        unl = set(unlabel_idx[i])

        assert len(test) == round(0.3 * instance_num)
        assert len(lab) == round(0.05 * len(train))

        # validity
        traintest = train.union(test)
        labun = lab.union(unl)
        assert traintest == set(range(instance_num))
        assert labun == train

        # is all-class
        len(unique_labels(y[label_idx[i]])) == label_num

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_unique_labels_mixed_types():
    # Mix with binary or multiclass and multilabel
    mix_clf_format = product(EXAMPLES["multilabel-indicator"],
                             EXAMPLES["multiclass"] +
                             EXAMPLES["binary"])

    for y_multilabel, y_multiclass in mix_clf_format:
        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

    assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]])
    assert_raises(ValueError, unique_labels, ["1", 2])
    assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]])
    assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]])

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(range(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [1, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], range(5)),
                       np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                       np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5))

Source File: robust_soft_learning_vector_quantization.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License

5 votes

def partial_fit(self, X, y, classes=None, sample_weight=None):
        """Fit the LVQ model to the given training data and parameters using
        gradient ascent.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : numpy.ndarray of shape (n_samples, n_targets)
            An array-like with the class labels of all samples in X
        classes : numpy.ndarray, optional (default=None)
            Contains all possible/known class labels. Usage varies depending
            on the learning method.
        sample_weight : Not used.

        Returns
        --------
        self
        """
        if set(unique_labels(y)).issubset(set(self.classes_)) or \
                self.initial_fit is True:
            X, y = self._validate_train_parms(X, y, classes=classes)
        else:
            raise ValueError('Class {} was not learned - please declare all \
                             classes in first call of fit/partial_fit'
                             .format(y))

        self._optimize(X, y)
        return self

Source File: metrics.py From hyperparameter_hunter with MIT License

4 votes

def classify_output(target, prediction):
    """Force continuous `prediction` into the discrete, classified space of `target`.
    This is not an output/feature transformer akin to SKLearn's discretization transformers. This
    function is intended for use in the very specific case of having a `target` that is
    classification-like ("binary", "multiclass", etc.), with `prediction` that resembles a
    "continuous" target, despite being made for `target`. The most common reason for this occurrence
    is that `prediction` is actually the division-averaged predictions collected along the course
    of a :class:`~hyperparameter_hunter.experiments.CVExperiment`. In this case, the original model
    predictions should have been classification-like; however, due to disagreement in the division
    predictions, the resulting average predictions appear to be continuous

    Parameters
    ----------
    target: Array-like
        # TODO: ...
    prediction: Array-like
        # TODO: ...

    Returns
    -------
    numpy.array
        # TODO: ...

    Notes
    -----
    Target types used by this function are defined by `sklearn.utils.multiclass.type_of_target`.

    If a `prediction` value is exactly between two `target` values, it will assume the lower of the
    two values. For example, given a single prediction of 1.5 and unique `labels` of [0, 1, 2, 3],
    the value of that prediction will be 1, rather than 2

    Examples
    --------
    >>> import numpy as np
    >>> classify_output(np.array([0, 3, 1, 2]), [0.5, 1.51, 0.66, 4.9])
    array([0, 2, 1, 3])
    >>> classify_output(np.array([0, 1, 2, 3]), [0.5, 1.51, 0.66, 4.9])
    array([0, 2, 1, 3])
    >>> # TODO: ... Add more examples, including binary classification
    """
    # MARK: Might be ignoring 1-dimensional, label encodings, like 2nd case in `test_get_clean_prediction`:
    #   ([1, 0, 1, 0], [0.9, 0.1, 0.8, 0.2], [1.0, 0.0, 1.0, 0.0])
    labels = unique_labels(target)  # FLAG: ORIGINAL
    # labels = unique_labels(*target)  # FLAG: TEST
    return np.array([labels[(np.abs(labels - _)).argmin()] for _ in prediction])


##################################################
# Miscellaneous Utilities
##################################################

Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License

4 votes

def __init__(self, X, y, mu=0.1, gamma=0.1, rho=1, lambda_init=0.1, lambda_pace=0.01, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceSPAL, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(unique_labels(self.y)) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._mu = mu
        self._gamma = gamma
        self._rho = rho
        self._lambda_init = lambda_init
        self._lambda_pace = lambda_pace
        self._lambda = lambda_init

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X)))

Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License

4 votes

def __init__(self, X, y, beta=1000, gamma=0.1, rho=1, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceBMDR, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(ul) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._beta = beta
        self._gamma = gamma
        self._rho = rho

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X)))

Source File: ukp_evaluation.py From acl2019-BERT-argument-classification-and-clustering with Apache License 2.0

4 votes

def analyze_predictions(filepath):
    total_sent = 0
    correct_sent = 0
    count = {}

    y_true = []
    y_pred = []

    for line in open(filepath, encoding='utf8'):
        splits = line.strip().split("\t")
        gold = splits[0]
        pred = splits[1]

        total_sent += 1
        if gold == pred:
            correct_sent += 1

        if gold not in count:
            count[gold] = {}

        if pred not in count[gold]:
            count[gold][pred] = 0

        count[gold][pred] += 1

        y_true.append(gold)
        y_pred.append(pred)

    print("gold - pred - Confusion Matrix")
    for gold_label in sorted(count.keys()):
        for pred_label in sorted(count[gold_label].keys()):
            print("%s - %s: %d" % (gold_label, pred_label, count[gold_label][pred_label]))


    print(":: BERT ::")
    print("Acc: %.2f%%" % (correct_sent/total_sent*100) )
    labels = unique_labels(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average=None)
    rec = recall_score(y_true, y_pred, average=None)
    f1 = f1_score(y_true, y_pred, average=None)

    arg_f1 = []
    for idx, label in enumerate(labels):
        print("\n:: F1 for "+label+" ::")
        print("Prec: %.2f%%" % (prec[idx]*100))
        print("Recall: %.2f%%" % (rec[idx]*100))
        print("F1: %.2f%%" % (f1[idx]*100))

        if label in labels:
            if label != 'NoArgument':
                arg_f1.append(f1[idx])


    print("\n:: Macro Weighted for all  ::")
    print("F1: %.2f%%" % (np.mean(f1)*100))

    prec_mapping = {key:value for key, value in zip(labels, prec)}
    rec_mapping = {key:value for key, value in zip(labels, rec)}
    return np.mean(f1), prec_mapping, rec_mapping

Source File: plotting.py From U-Time with MIT License

4 votes

def plot_confusion_matrix(y_true, y_pred, n_classes,
                          normalize=False, id_=None,
                          cmap="Blues"):
    """
    Adapted from sklearn 'plot_confusion_matrix.py'.

    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    if normalize:
        title = 'Normalized confusion matrix for identifier {}'.format(id_ or "???")
    else:
        title = 'Confusion matrix, without normalization for identifier {}' \
                ''.format(id_ or "???")

    # Compute confusion matrix
    classes = np.arange(n_classes)
    cm = confusion_matrix(y_true, y_pred)
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    # Get transformed labels
    from utime import defaults
    labels = [defaults.class_int_to_stage_string[i] for i in classes]

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.get_cmap(cmap))
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=labels, yticklabels=labels,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.3f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return fig, ax

Source File: lib_plot.py From Realtime-Action-Recognition with MIT License

4 votes

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues,
                          size=None):
    """ (Copied from sklearn website)
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Display normalized confusion matrix ...")
    else:
        print('Display confusion matrix without normalization ...')

    # print(cm)

    fig, ax = plt.subplots()
    if size is None:
        size = (12, 8)
    fig.set_size_inches(size[0], size[1])

    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')
    ax.set_ylim([-0.5, len(classes)-0.5])

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax, cm


# Drawings ==============================================================

Source File: test.py From rasa-for-botfront with Apache License 2.0

4 votes

def plot_story_evaluation(
    test_y,
    predictions,
    report,
    precision,
    f1,
    accuracy,
    in_training_data_fraction,
    out_directory,
    disable_plotting,
):
    """Plot the results of story evaluation"""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt
    from rasa.nlu.test import plot_confusion_matrix

    log_evaluation_table(
        test_y,
        "ACTION",
        report,
        precision,
        f1,
        accuracy,
        in_training_data_fraction,
        include_report=True,
    )

    if disable_plotting:
        return

    cnf_matrix = confusion_matrix(test_y, predictions)

    plot_confusion_matrix(
        cnf_matrix,
        classes=unique_labels(test_y, predictions),
        title="Action Confusion matrix",
    )

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches="tight")

Source File: classification_metrics_utils.py From sciwing with MIT License

4 votes

def get_confusion_matrix_and_labels(
        predicted_tag_indices: List[List[int]],
        true_tag_indices: List[List[int]],
        true_masked_label_indices: List[List[int]],
        pred_labels_mask: List[List[int]] = None,
    ) -> (np.array, List[int]):
        """ Gets the confusion matrix and the list of classes for which the confusion matrix
        is generated


        Parameters
        ----------
        predicted_tag_indices : List[List[int]]
            Predicted tag indices for a batch
        true_tag_indices : List[List[int]]
            True tag indices for a batch
        true_masked_label_indices : List[List[int]]
            Every integer is either a 0 or 1, where 1 will indicate that the
            label in `true_tag_indices` will be ignored
        """
        # get the masked label indices
        true_masked_label_indices = torch.BoolTensor(true_masked_label_indices).cpu()

        # select the elements in true tag indices where mask is 1
        # these classes will not be considered for calculating the metrics
        true_masked_label_indices = torch.masked_select(
            torch.tensor(true_tag_indices, dtype=torch.long), true_masked_label_indices
        )
        true_masked_label_indices = list(set(true_masked_label_indices.tolist()))
        masked_classes = true_masked_label_indices

        # do the same for pred labels
        if pred_labels_mask is not None:
            pred_mask_label_indices = torch.BoolTensor(pred_labels_mask).cpu()
            pred_mask_label_indices = torch.masked_select(
                torch.tensor(predicted_tag_indices, dtype=torch.long),
                pred_mask_label_indices,
            )
            pred_mask_label_indices = list(set(pred_mask_label_indices.tolist()))
            masked_classes = masked_classes + pred_mask_label_indices

        # get the set of unique classes
        predicted_tags_flat = list(itertools.chain.from_iterable(predicted_tag_indices))
        labels = list(itertools.chain.from_iterable(true_tag_indices))
        predicted_tags_flat = np.array(predicted_tags_flat)
        labels_numpy = np.array(labels)
        classes = unique_labels(labels_numpy, predicted_tags_flat)

        classes = filter(lambda class_: class_ not in masked_classes, classes)
        classes = list(classes)

        confusion_mtrx = confusion_matrix(
            labels_numpy, predicted_tags_flat, labels=classes
        )
        return confusion_mtrx, classes

Source File: PlotClass.py From ldgcnn with MIT License

4 votes

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if not title:
            if normalize:
                title = 'Normalized confusion matrix'
            else:
                title = 'Confusion matrix, without normalization'
    
        # Compute confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        # Only use the labels that appear in the data
        classes = classes[unique_labels(y_true, y_pred)]
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print('Confusion matrix, without normalization')
    
        print(cm)
        fig = plt.figure(figsize= (14,10.8))
        ax = fig.add_subplot(111)
        im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        ax.figure.colorbar(im, ax=ax)
        # We want to show all ticks...
        ax.set(xticks=np.arange(cm.shape[1]),
               yticks=np.arange(cm.shape[0]),
               # ... and label them with the respective list entries
               xticklabels=classes, yticklabels=classes,
               title=title,
               ylabel='True label',
               xlabel='Predicted label')
    
        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")
    
        # Loop over data dimensions and create text annotations.
    #    fmt = '.2f' if normalize else 'd'
    #    thresh = cm.max() / 2.
    #    for i in range(cm.shape[0]):
    #        for j in range(cm.shape[1]):
    #            ax.text(j, i, format(cm[i, j], fmt),
    #                    ha="center", va="center",
    #                    color="white" if cm[i, j] > thresh else "black")
        fig.tight_layout()
        return cm,ax

Python sklearn.utils.multiclass.unique_labels() Examples