Python sklearn.utils.multiclass.unique_labels() Examples

The following are 30 code examples of sklearn.utils.multiclass.unique_labels(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.multiclass , or try the search function .
Example #1
Source File: sssrm.py    From brainiak with Apache License 2.0 6 votes vote down vote up
def _init_classes(self, y):
        """Map all possible classes to the range [0,..,C-1]

        Parameters
        ----------

        y : list of arrays of int, each element has shape=[samples_i,]
            Labels of the samples for each subject


        Returns
        -------
        new_y : list of arrays of int, each element has shape=[samples_i,]
            Mapped labels of the samples for each subject

        Note
        ----
            The mapping of the classes is saved in the attribute classes_.
        """
        self.classes_ = unique_labels(utils.concatenate_not_none(y))
        new_y = [None] * len(y)
        for s in range(len(y)):
            new_y[s] = np.digitize(y[s], self.classes_) - 1
        return new_y 
Example #2
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, X, y, mode='LAL_iterative', data_path='.', cls_est=50, train_slt=True, **kwargs):
        super(QueryInstanceLAL, self).__init__(X, y)
        if len(unique_labels(self.y)) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if not os.path.isdir(data_path):
            raise ValueError("Please pass the directory of the file.")
        self._iter_path = os.path.join(data_path, 'LAL-iterativetree-simulatedunbalanced-big.npz')
        self._rand_path = os.path.join(data_path, 'LAL-randomtree-simulatedunbalanced-big.npz')
        assert mode in ['LAL_iterative', 'LAL_independent']
        self._mode = mode
        self._selector = None
        self.model = RandomForestClassifier(n_estimators=cls_est, oob_score=True, n_jobs=8)
        if train_slt:
            self.download_data()
            self.train_selector_from_file() 
Example #3
Source File: confusion_balancer.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit the data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self.estimator, dtype=FLOAT_DTYPES)
        if not isinstance(self.estimator, ProbabilisticClassifier):
            raise ValueError(
                "The ConfusionBalancer meta model only works on classifcation models with .predict_proba."
            )
        self.estimator.fit(X, y)
        self.classes_ = unique_labels(y)
        cfm = confusion_matrix(y, self.estimator.predict(X)).T + self.cfm_smooth
        self.cfm_ = cfm / cfm.sum(axis=1).reshape(-1, 1)
        return self 
Example #4
Source File: shapelets.py    From tslearn with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _preprocess_labels(self, y):
        self.classes_ = unique_labels(y)
        n_labels = len(self.classes_)
        if n_labels == 1:
            raise ValueError("Classifier can't train when only one class "
                             "is present.")
        if self.classes_.dtype in [numpy.int32, numpy.int64]:
            self.label_to_ind_ = {int(lab): ind
                                  for ind, lab in enumerate(self.classes_)}
        else:
            self.label_to_ind_ = {lab: ind
                                  for ind, lab in enumerate(self.classes_)}
        y_ind = numpy.array(
            [self.label_to_ind_[lab] for lab in y]
        )
        y_ = to_categorical(y_ind)
        if n_labels == 2:
            y_ = y_[:, 1:]  # Keep only indicator of positive class
        return y_ 
Example #5
Source File: test.py    From rasa_core with Apache License 2.0 6 votes vote down vote up
def plot_story_evaluation(test_y, predictions,
                          report, precision, f1, accuracy,
                          in_training_data_fraction,
                          out_directory):
    """Plot the results of story evaluation"""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt
    from rasa_nlu.test import plot_confusion_matrix

    log_evaluation_table(test_y, "ACTION",
                         report, precision, f1, accuracy,
                         in_training_data_fraction,
                         include_report=True)

    cnf_matrix = confusion_matrix(test_y, predictions)

    plot_confusion_matrix(cnf_matrix,
                          classes=unique_labels(test_y, predictions),
                          title='Action Confusion matrix')

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(os.path.join(out_directory, "story_confmat.pdf"),
                bbox_inches='tight') 
Example #6
Source File: labels.py    From pumpp with ISC License 6 votes vote down vote up
def fit(self, y):
        """Fit label binarizer

        Parameters
        ----------
        y : array of shape [n_samples,] or [n_samples, n_classes]
            Target values. The 2-d matrix should only contain 0 and 1,
            represents multilabel classification.

        Returns
        -------
        self : returns an instance of self.
        """
        self.y_type_ = type_of_target(y)
        if 'multioutput' in self.y_type_:
            raise ValueError("Multioutput target data is not supported with "
                             "label binarization")
        if _num_samples(y) == 0:
            raise ValueError('y has 0 samples: %r' % y)

        self.sparse_input_ = sp.issparse(y)
        self.classes_ = unique_labels(y)
        return self 
Example #7
Source File: evaluate.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def evaluate_intents(targets, predictions):  # pragma: no cover
    """Creates a confusion matrix and summary statistics for intent predictions.

    Only considers those examples with a set intent.
    Others are filtered out."""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt

    # remove empty intent targets
    num_examples = len(targets)
    targets, predictions = remove_empty_intent_examples(targets, predictions)
    logger.info("Intent Evaluation: Only considering those "
                "{} examples that have a defined intent out "
                "of {} examples".format(targets.size, num_examples))
    log_evaluation_table(targets, predictions)

    cnf_matrix = confusion_matrix(targets, predictions)
    labels = unique_labels(targets, predictions)
    plot_confusion_matrix(cnf_matrix,
                          classes=labels,
                          title='Intent Confusion matrix')

    plt.show() 
Example #8
Source File: _template.py    From project-template with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """A reference implementation of a fitting function for a classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,)
            The target values. An array of int.

        Returns
        -------
        self : object
            Returns self.
        """
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        # Return the classifier
        return self 
Example #9
Source File: test_multiclass.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_unique_labels_non_specific():
    # Test unique_labels with a variety of collected examples

    # Smoke test for all supported format
    for format in ["binary", "multiclass", "multilabel-indicator"]:
        for y in EXAMPLES[format]:
            unique_labels(y)

    # We don't support those format at the moment
    for example in NON_ARRAY_LIKE_EXAMPLES:
        assert_raises(ValueError, unique_labels, example)

    for y_type in ["unknown", "continuous", 'continuous-multioutput',
                   'multiclass-multioutput']:
        for example in EXAMPLES[y_type]:
            assert_raises(ValueError, unique_labels, example) 
Example #10
Source File: test_multiclass.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_unique_labels_non_specific():
    # Test unique_labels with a variety of collected examples

    # Smoke test for all supported format
    for format in ["binary", "multiclass", "multilabel-indicator"]:
        for y in EXAMPLES[format]:
            unique_labels(y)

    # We don't support those format at the moment
    for example in NON_ARRAY_LIKE_EXAMPLES:
        assert_raises(ValueError, unique_labels, example)

    for y_type in ["unknown", "continuous", 'continuous-multioutput',
                   'multiclass-multioutput']:
        for example in EXAMPLES[y_type]:
            assert_raises(ValueError, unique_labels, example) 
Example #11
Source File: evaluate.py    From rasa_wechat with Apache License 2.0 6 votes vote down vote up
def run_story_evaluation(story_file, policy_model_path, nlu_model_path,
                         out_file, max_stories):
    """Run the evaluation of the stories, plots the results."""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels

    test_y, preds = collect_story_predictions(story_file, policy_model_path,
                                              nlu_model_path, max_stories)

    log_evaluation_table(test_y, preds)
    cnf_matrix = confusion_matrix(test_y, preds)
    plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds),
                          title='Action Confusion matrix')

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(out_file, bbox_inches='tight') 
Example #12
Source File: test_multiclass.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_unique_labels_mixed_types():
    # Mix with binary or multiclass and multilabel
    mix_clf_format = product(EXAMPLES["multilabel-indicator"],
                             EXAMPLES["multiclass"] +
                             EXAMPLES["binary"])

    for y_multilabel, y_multiclass in mix_clf_format:
        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

    assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]])
    assert_raises(ValueError, unique_labels, ["1", 2])
    assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]])
    assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]]) 
Example #13
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        self.num_fit_cols_ = X.shape[1]
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            self.gmms_[c] = [
                GaussianMixture(
                    n_components=self.n_components,
                    covariance_type=self.covariance_type,
                    tol=self.tol,
                    reg_covar=self.reg_covar,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    init_params=self.init_params,
                    weights_init=self.weights_init,
                    means_init=self.means_init,
                    precisions_init=self.precisions_init,
                    random_state=self.random_state,
                    warm_start=self.warm_start,
                ).fit(subset_x[:, i].reshape(-1, 1), subset_y)
                for i in range(X.shape[1])
            ]
        return self 
Example #14
Source File: bayesian_gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = BayesianGaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weight_concentration_prior_type=self.weight_concentration_prior_type,
                weight_concentration_prior=self.weight_concentration_prior,
                mean_precision_prior=self.mean_precision_prior,
                mean_prior=self.mean_prior,
                degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                covariance_prior=self.covariance_prior,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #15
Source File: gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = GaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weights_init=self.weights_init,
                means_init=self.means_init,
                precisions_init=self.precisions_init,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #16
Source File: test_multiclass.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [1, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)),
                       np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                       np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5)) 
Example #17
Source File: neighbors.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_features, n_samples)
        :param y: array-like, shape=(n_samples)
        :return: Returns an instance of self
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)

        self.classes_ = unique_labels(y)
        self.models_, self.priors_logp_ = {}, {}
        for target_label in self.classes_:
            x_subset = X[y == target_label]

            # Computing joint distribution
            self.models_[target_label] = KernelDensity(
                bandwidth=self.bandwidth,
                kernel=self.kernel,
                algorithm=self.algorithm,
                metric=self.metric,
                atol=self.atol,
                rtol=self.rtol,
                breadth_first=self.breath_first,
                leaf_size=self.leaf_size,
                metric_params=self.metric_params,
            ).fit(x_subset)

            # Computing target class prior
            self.priors_logp_[target_label] = np.log(len(x_subset) / len(X))

        return self 
Example #18
Source File: test_split.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_split1_allclass():
    train_idx, test_idx, label_idx, unlabel_idx = split(X=X,
                                                        y=y,
                                                        all_class=True, split_count=split_count,
                                                        test_ratio=0.3, initial_label_rate=0.05,
                                                        saving_path=None,
                                                        query_type='AllLabels')
    assert len(train_idx) == split_count
    assert len(test_idx) == split_count
    assert len(label_idx) == split_count
    assert len(unlabel_idx) == split_count

    for i in range(split_count):
        train = set(train_idx[i])
        test = set(test_idx[i])
        lab = set(label_idx[i])
        unl = set(unlabel_idx[i])

        assert len(test) == round(0.3 * instance_num)
        assert len(lab) == round(0.05 * len(train))

        # validity
        traintest = train.union(test)
        labun = lab.union(unl)
        assert traintest == set(range(instance_num))
        assert labun == train

        # is all-class
        len(unique_labels(y[label_idx[i]])) == label_num 
Example #19
Source File: test_multiclass.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unique_labels_mixed_types():
    # Mix with binary or multiclass and multilabel
    mix_clf_format = product(EXAMPLES["multilabel-indicator"],
                             EXAMPLES["multiclass"] +
                             EXAMPLES["binary"])

    for y_multilabel, y_multiclass in mix_clf_format:
        assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel)
        assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass)

    assert_raises(ValueError, unique_labels, [[1, 2]], [["a", "d"]])
    assert_raises(ValueError, unique_labels, ["1", 2])
    assert_raises(ValueError, unique_labels, [["1", 2], [1, 3]])
    assert_raises(ValueError, unique_labels, [["1", "2"], [2, 3]]) 
Example #20
Source File: test_multiclass.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(range(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [1, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], range(5)),
                       np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                       np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5)) 
Example #21
Source File: robust_soft_learning_vector_quantization.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def partial_fit(self, X, y, classes=None, sample_weight=None):
        """Fit the LVQ model to the given training data and parameters using
        gradient ascent.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : numpy.ndarray of shape (n_samples, n_targets)
            An array-like with the class labels of all samples in X
        classes : numpy.ndarray, optional (default=None)
            Contains all possible/known class labels. Usage varies depending
            on the learning method.
        sample_weight : Not used.

        Returns
        --------
        self
        """
        if set(unique_labels(y)).issubset(set(self.classes_)) or \
                self.initial_fit is True:
            X, y = self._validate_train_parms(X, y, classes=classes)
        else:
            raise ValueError('Class {} was not learned - please declare all \
                             classes in first call of fit/partial_fit'
                             .format(y))

        self._optimize(X, y)
        return self 
Example #22
Source File: metrics.py    From hyperparameter_hunter with MIT License 4 votes vote down vote up
def classify_output(target, prediction):
    """Force continuous `prediction` into the discrete, classified space of `target`.
    This is not an output/feature transformer akin to SKLearn's discretization transformers. This
    function is intended for use in the very specific case of having a `target` that is
    classification-like ("binary", "multiclass", etc.), with `prediction` that resembles a
    "continuous" target, despite being made for `target`. The most common reason for this occurrence
    is that `prediction` is actually the division-averaged predictions collected along the course
    of a :class:`~hyperparameter_hunter.experiments.CVExperiment`. In this case, the original model
    predictions should have been classification-like; however, due to disagreement in the division
    predictions, the resulting average predictions appear to be continuous

    Parameters
    ----------
    target: Array-like
        # TODO: ...
    prediction: Array-like
        # TODO: ...

    Returns
    -------
    numpy.array
        # TODO: ...

    Notes
    -----
    Target types used by this function are defined by `sklearn.utils.multiclass.type_of_target`.

    If a `prediction` value is exactly between two `target` values, it will assume the lower of the
    two values. For example, given a single prediction of 1.5 and unique `labels` of [0, 1, 2, 3],
    the value of that prediction will be 1, rather than 2

    Examples
    --------
    >>> import numpy as np
    >>> classify_output(np.array([0, 3, 1, 2]), [0.5, 1.51, 0.66, 4.9])
    array([0, 2, 1, 3])
    >>> classify_output(np.array([0, 1, 2, 3]), [0.5, 1.51, 0.66, 4.9])
    array([0, 2, 1, 3])
    >>> # TODO: ... Add more examples, including binary classification
    """
    # MARK: Might be ignoring 1-dimensional, label encodings, like 2nd case in `test_get_clean_prediction`:
    #   ([1, 0, 1, 0], [0.9, 0.1, 0.8, 0.2], [1.0, 0.0, 1.0, 0.0])
    labels = unique_labels(target)  # FLAG: ORIGINAL
    # labels = unique_labels(*target)  # FLAG: TEST
    return np.array([labels[(np.abs(labels - _)).argmin()] for _ in prediction])


##################################################
# Miscellaneous Utilities
################################################## 
Example #23
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, X, y, mu=0.1, gamma=0.1, rho=1, lambda_init=0.1, lambda_pace=0.01, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceSPAL, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(unique_labels(self.y)) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._mu = mu
        self._gamma = gamma
        self._rho = rho
        self._lambda_init = lambda_init
        self._lambda_pace = lambda_pace
        self._lambda = lambda_init

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X))) 
Example #24
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, X, y, beta=1000, gamma=0.1, rho=1, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceBMDR, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(ul) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._beta = beta
        self._gamma = gamma
        self._rho = rho

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X))) 
Example #25
Source File: ukp_evaluation.py    From acl2019-BERT-argument-classification-and-clustering with Apache License 2.0 4 votes vote down vote up
def analyze_predictions(filepath):
    total_sent = 0
    correct_sent = 0
    count = {}

    y_true = []
    y_pred = []

    for line in open(filepath, encoding='utf8'):
        splits = line.strip().split("\t")
        gold = splits[0]
        pred = splits[1]

        total_sent += 1
        if gold == pred:
            correct_sent += 1

        if gold not in count:
            count[gold] = {}

        if pred not in count[gold]:
            count[gold][pred] = 0

        count[gold][pred] += 1

        y_true.append(gold)
        y_pred.append(pred)

    print("gold - pred - Confusion Matrix")
    for gold_label in sorted(count.keys()):
        for pred_label in sorted(count[gold_label].keys()):
            print("%s - %s: %d" % (gold_label, pred_label, count[gold_label][pred_label]))


    print(":: BERT ::")
    print("Acc: %.2f%%" % (correct_sent/total_sent*100) )
    labels = unique_labels(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average=None)
    rec = recall_score(y_true, y_pred, average=None)
    f1 = f1_score(y_true, y_pred, average=None)

    arg_f1 = []
    for idx, label in enumerate(labels):
        print("\n:: F1 for "+label+" ::")
        print("Prec: %.2f%%" % (prec[idx]*100))
        print("Recall: %.2f%%" % (rec[idx]*100))
        print("F1: %.2f%%" % (f1[idx]*100))

        if label in labels:
            if label != 'NoArgument':
                arg_f1.append(f1[idx])


    print("\n:: Macro Weighted for all  ::")
    print("F1: %.2f%%" % (np.mean(f1)*100))

    prec_mapping = {key:value for key, value in zip(labels, prec)}
    rec_mapping = {key:value for key, value in zip(labels, rec)}
    return np.mean(f1), prec_mapping, rec_mapping 
Example #26
Source File: plotting.py    From U-Time with MIT License 4 votes vote down vote up
def plot_confusion_matrix(y_true, y_pred, n_classes,
                          normalize=False, id_=None,
                          cmap="Blues"):
    """
    Adapted from sklearn 'plot_confusion_matrix.py'.

    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    if normalize:
        title = 'Normalized confusion matrix for identifier {}'.format(id_ or "???")
    else:
        title = 'Confusion matrix, without normalization for identifier {}' \
                ''.format(id_ or "???")

    # Compute confusion matrix
    classes = np.arange(n_classes)
    cm = confusion_matrix(y_true, y_pred)
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    # Get transformed labels
    from utime import defaults
    labels = [defaults.class_int_to_stage_string[i] for i in classes]

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.get_cmap(cmap))
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=labels, yticklabels=labels,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.3f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return fig, ax 
Example #27
Source File: lib_plot.py    From Realtime-Action-Recognition with MIT License 4 votes vote down vote up
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues,
                          size=None):
    """ (Copied from sklearn website)
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Display normalized confusion matrix ...")
    else:
        print('Display confusion matrix without normalization ...')

    # print(cm)

    fig, ax = plt.subplots()
    if size is None:
        size = (12, 8)
    fig.set_size_inches(size[0], size[1])

    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')
    ax.set_ylim([-0.5, len(classes)-0.5])

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax, cm


# Drawings ============================================================== 
Example #28
Source File: test.py    From rasa-for-botfront with Apache License 2.0 4 votes vote down vote up
def plot_story_evaluation(
    test_y,
    predictions,
    report,
    precision,
    f1,
    accuracy,
    in_training_data_fraction,
    out_directory,
    disable_plotting,
):
    """Plot the results of story evaluation"""
    from sklearn.metrics import confusion_matrix
    from sklearn.utils.multiclass import unique_labels
    import matplotlib.pyplot as plt
    from rasa.nlu.test import plot_confusion_matrix

    log_evaluation_table(
        test_y,
        "ACTION",
        report,
        precision,
        f1,
        accuracy,
        in_training_data_fraction,
        include_report=True,
    )

    if disable_plotting:
        return

    cnf_matrix = confusion_matrix(test_y, predictions)

    plot_confusion_matrix(
        cnf_matrix,
        classes=unique_labels(test_y, predictions),
        title="Action Confusion matrix",
    )

    fig = plt.gcf()
    fig.set_size_inches(int(20), int(20))
    fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches="tight") 
Example #29
Source File: classification_metrics_utils.py    From sciwing with MIT License 4 votes vote down vote up
def get_confusion_matrix_and_labels(
        predicted_tag_indices: List[List[int]],
        true_tag_indices: List[List[int]],
        true_masked_label_indices: List[List[int]],
        pred_labels_mask: List[List[int]] = None,
    ) -> (np.array, List[int]):
        """ Gets the confusion matrix and the list of classes for which the confusion matrix
        is generated


        Parameters
        ----------
        predicted_tag_indices : List[List[int]]
            Predicted tag indices for a batch
        true_tag_indices : List[List[int]]
            True tag indices for a batch
        true_masked_label_indices : List[List[int]]
            Every integer is either a 0 or 1, where 1 will indicate that the
            label in `true_tag_indices` will be ignored
        """
        # get the masked label indices
        true_masked_label_indices = torch.BoolTensor(true_masked_label_indices).cpu()

        # select the elements in true tag indices where mask is 1
        # these classes will not be considered for calculating the metrics
        true_masked_label_indices = torch.masked_select(
            torch.tensor(true_tag_indices, dtype=torch.long), true_masked_label_indices
        )
        true_masked_label_indices = list(set(true_masked_label_indices.tolist()))
        masked_classes = true_masked_label_indices

        # do the same for pred labels
        if pred_labels_mask is not None:
            pred_mask_label_indices = torch.BoolTensor(pred_labels_mask).cpu()
            pred_mask_label_indices = torch.masked_select(
                torch.tensor(predicted_tag_indices, dtype=torch.long),
                pred_mask_label_indices,
            )
            pred_mask_label_indices = list(set(pred_mask_label_indices.tolist()))
            masked_classes = masked_classes + pred_mask_label_indices

        # get the set of unique classes
        predicted_tags_flat = list(itertools.chain.from_iterable(predicted_tag_indices))
        labels = list(itertools.chain.from_iterable(true_tag_indices))
        predicted_tags_flat = np.array(predicted_tags_flat)
        labels_numpy = np.array(labels)
        classes = unique_labels(labels_numpy, predicted_tags_flat)

        classes = filter(lambda class_: class_ not in masked_classes, classes)
        classes = list(classes)

        confusion_mtrx = confusion_matrix(
            labels_numpy, predicted_tags_flat, labels=classes
        )
        return confusion_mtrx, classes 
Example #30
Source File: PlotClass.py    From ldgcnn with MIT License 4 votes vote down vote up
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if not title:
            if normalize:
                title = 'Normalized confusion matrix'
            else:
                title = 'Confusion matrix, without normalization'
    
        # Compute confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        # Only use the labels that appear in the data
        classes = classes[unique_labels(y_true, y_pred)]
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print('Confusion matrix, without normalization')
    
        print(cm)
        fig = plt.figure(figsize= (14,10.8))
        ax = fig.add_subplot(111)
        im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        ax.figure.colorbar(im, ax=ax)
        # We want to show all ticks...
        ax.set(xticks=np.arange(cm.shape[1]),
               yticks=np.arange(cm.shape[0]),
               # ... and label them with the respective list entries
               xticklabels=classes, yticklabels=classes,
               title=title,
               ylabel='True label',
               xlabel='Predicted label')
    
        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")
    
        # Loop over data dimensions and create text annotations.
    #    fmt = '.2f' if normalize else 'd'
    #    thresh = cm.max() / 2.
    #    for i in range(cm.shape[0]):
    #        for j in range(cm.shape[1]):
    #            ax.text(j, i, format(cm[i, j], fmt),
    #                    ha="center", va="center",
    #                    color="white" if cm[i, j] > thresh else "black")
        fig.tight_layout()
        return cm,ax