Python Examples of sklearn.metrics.check

Source File: scorer.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def check_scoring(estimator, scoring=None, **kwargs):
    res = sklearn_check_scoring(estimator, scoring=scoring, **kwargs)
    if callable(scoring):
        # Heuristic to ensure user has not passed a metric
        module = getattr(scoring, "__module__", None)
        if (
            hasattr(module, "startswith")
            and module.startswith("dask_ml.metrics.")
            and not module.startswith("dask_ml.metrics.scorer")
            and not module.startswith("dask_ml.metrics.tests.")
        ):
            raise ValueError(
                "scoring value %r looks like it is a metric "
                "function rather than a scorer. A scorer should "
                "require an estimator as its first parameter. "
                "Please use `make_scorer` to convert a metric "
                "to a scorer." % scoring
            )
    if scoring in SCORERS.keys():
        func, kwargs = SCORERS[scoring]
        return make_scorer(func, **kwargs)
    return res

Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def _validate_parameters(self, X, y):
        if (self.max_iter is not None) and self.max_iter < 1:
            raise ValueError(
                "Received max_iter={}. max_iter < 1 is not supported".format(
                    self.max_iter
                )
            )

        # Make sure dask arrays are passed so error on unknown chunk size is raised
        if isinstance(X, dd.DataFrame):
            X = X.to_dask_array()
        if isinstance(y, (dd.DataFrame, dd.Series)):
            y = y.to_dask_array()
        kwargs = dict(accept_unknown_chunks=False, accept_dask_dataframe=False)
        X = self._check_array(X, **kwargs)
        y = self._check_array(y, ensure_2d=False, **kwargs)
        scorer = check_scoring(self.estimator, scoring=self.scoring)
        return X, y, scorer

Source File: search.py From sk-dist with Apache License 2.0

6 votes

def _fit_one_fold(fit_set, models, X, y, scoring, fit_params):
    """
    Fits the given estimator on one fold of training data.
    Scores the fitted estimator against the test fold.
    """
    train = fit_set[0][0]
    test = fit_set[0][1]
    estimator_ = _clone(models[fit_set[1]["model_index"]][1])
    parameters = fit_set[1]["param_set"]
    X_train, y_train = _safe_split(estimator_, X, y, train)
    X_test, y_test = _safe_split(estimator_, X, y, test, train)
    if parameters is not None:
        estimator_.set_params(**parameters)
    estimator_.fit(X_train, y_train, **fit_params)
    scorer = check_scoring(estimator_, scoring=scoring)
    is_multimetric = not callable(scorer)
    out_dct = fit_set[1]
    out_dct["score"] = _score(
        estimator_, X_test, y_test, 
        scorer, is_multimetric
        )
    return out_dct

Source File: scoring.py From skorch with BSD 3-Clause "New" or "Revised" License

5 votes

def _scoring(self, net, X_test, y_test):
        """Resolve scoring and apply it to data. Use cached prediction
        instead of running inference again, if available."""
        scorer = check_scoring(net, self.scoring_)
        return scorer(net, X_test, y_test)

Source File: flofo_importance.py From lofo-importance with MIT License

5 votes

def __init__(self, trained_model, validation_df, features, target,
                 scoring, n_jobs=None):
        self.trained_model = trained_model
        self.df = validation_df.copy()
        self.features = features
        self.target = target
        self.n_jobs = n_jobs
        self.scorer = check_scoring(estimator=self.trained_model, scoring=scoring)

        # FLOFO defaults
        self.num_bins = 10
        self.shuffle_func = np.random.permutation
        self.feature_group_len = 2
        self.num_sampling = 10

        min_data_needed = 10*(self.num_bins**self.feature_group_len)
        if self.df.shape[0] < min_data_needed:
            raise Exception("Small validation set (<{})".format(min_data_needed))
        if len(self.features) <= self.feature_group_len:
            raise Exception("FLOFO needs more than {} features".format(self.feature_group_len))

        if self.n_jobs is not None and self.n_jobs > 1:
            warning_str = ("Warning: If your model is multithreaded, please initialise the number"
                           "of jobs of LOFO to be equal to 1, otherwise you may experience performance issues.")
            warnings.warn(warning_str)

        self._bin_features()

Source File: single_best.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def _estimate_performances(self, X, y):
        performances = np.zeros(self.n_classifiers_)
        for idx, clf in enumerate(self.pool_classifiers_):
            scorer = check_scoring(clf, self.scoring)
            performances[idx] = scorer(clf, X, y)
        return performances

Source File: utils.py From verde with BSD 3-Clause "New" or "Revised" License

4 votes

def score_estimator(scoring, estimator, coordinates, data, weights=None):
    """
    Score the given gridder against the given data using the given metric.

    If the data and predictions have more than 1 component, the scores of each
    component will be averaged.

    Parameters
    ----------
    scoring : str or callable
        A scoring specification known to scikit-learn. See
        :func:`sklearn.metrics.check_scoring`.
    estimator : a Verde gridder
        The gridder to score. Usually derived from
        :class:`verde.base.BaseGridder`.
    coordinates : tuple of arrays
        Arrays with the coordinates of each data point. Should be in the
        following order: (easting, northing, vertical, ...).
        For the specific definition of coordinate systems and what these
        names mean, see the class docstring.
    data : array or tuple of arrays
        The data values of each data point. If the data has more than one
        component, *data* must be a tuple of arrays (one for each
        component).
    weights : None or array or tuple of arrays
        If not None, then the weights assigned to each data point. If more
        than one data component is provided, you must provide a weights
        array for each data component (if not None).

    Returns
    -------
    score : float
        The score.

    """
    coordinates, data, weights = check_fit_input(
        coordinates, data, weights, unpack=False
    )
    predicted = check_data(estimator.predict(coordinates))
    scorer = check_scoring(DummyEstimator, scoring=scoring)
    result = np.mean(
        [
            scorer(
                DummyEstimator(pred.ravel()),
                coordinates,
                data[i].ravel(),
                sample_weight=weights[i],
            )
            for i, pred in enumerate(predicted)
        ]
    )
    return result

Source File: utils.py From sk-dist with Apache License 2.0

4 votes

def _check_multimetric_scoring(estimator, scoring=None):
    """ Check the scoring parameter in cases when multiple metrics are allowed """
    if callable(scoring) or scoring is None or isinstance(scoring,
                                                          str):
        scorers = {"score": check_scoring(estimator, scoring=scoring)}
        return scorers, False
    else:
        err_msg_generic = ("scoring should either be a single string or "
                           "callable for single metric evaluation or a "
                           "list/tuple of strings or a dict of scorer name "
                           "mapped to the callable for multiple metric "
                           "evaluation. Got %s of type %s"
                           % (repr(scoring), type(scoring)))

        if isinstance(scoring, (list, tuple, set)):
            err_msg = ("The list/tuple elements must be unique "
                       "strings of predefined scorers. ")
            invalid = False
            try:
                keys = set(scoring)
            except TypeError:
                invalid = True
            if invalid:
                raise ValueError(err_msg)

            if len(keys) != len(scoring):
                raise ValueError(err_msg + "Duplicate elements were found in"
                                 " the given list. %r" % repr(scoring))
            elif len(keys) > 0:
                if not all(isinstance(k, str) for k in keys):
                    if any(callable(k) for k in keys):
                        raise ValueError(err_msg +
                                         "One or more of the elements were "
                                         "callables. Use a dict of score name "
                                         "mapped to the scorer callable. "
                                         "Got %r" % repr(scoring))
                    else:
                        raise ValueError(err_msg +
                                         "Non-string types were found in "
                                         "the given list. Got %r"
                                         % repr(scoring))
                scorers = {scorer: check_scoring(estimator, scoring=scorer)
                           for scorer in scoring}
            else:
                raise ValueError(err_msg +
                                 "Empty list was given. %r" % repr(scoring))

        elif isinstance(scoring, dict):
            keys = set(scoring)
            if not all(isinstance(k, str) for k in keys):
                raise ValueError("Non-string types were found in the keys of "
                                 "the given dict. scoring=%r" % repr(scoring))
            if len(keys) == 0:
                raise ValueError("An empty dict was passed. %r"
                                 % repr(scoring))
            scorers = {key: check_scoring(estimator, scoring=scorer)
                       for key, scorer in scoring.items()}
        else:
            raise ValueError(err_msg_generic)
        return scorers, True

Source File: static_selection.py From DESlib with BSD 3-Clause "New" or "Revised" License

4 votes

def fit(self, X, y):
        """Fit the static selection model by select an ensemble of classifier
        containing the base classifiers with highest accuracy in the given
        dataset.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        Returns
        -------
        self : object
            Returns self.
        """
        self._validate_parameters()

        X, y = check_X_y(X, y)

        super(StaticSelection, self).fit(X, y)

        self.n_classifiers_ensemble_ = int(
            self.n_classifiers_ * self.pct_classifiers)

        performances = np.zeros(self.n_classifiers_)

        if not self.base_already_encoded_:
            y_encoded = y
        else:
            y_encoded = self.enc_.transform(y)

        for clf_idx, clf in enumerate(self.pool_classifiers_):
            scorer = check_scoring(clf, self.scoring)
            performances[clf_idx] = scorer(clf, X, y_encoded)

        self.clf_indices_ = np.argsort(performances)[::-1][
                            0:self.n_classifiers_ensemble_]
        self.ensemble_ = [self.pool_classifiers_[clf_idx] for clf_idx in
                          self.clf_indices_]

        return self

Python sklearn.metrics.check_scoring() Examples