Python sklearn.metrics.get_scorer() Examples
The following are 14
code examples of sklearn.metrics.get_scorer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics
, or try the search function
.
Example #1
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
Example #2
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_thresholded_scorers_multilabel_indicator_data(): # Test that the scorer work with multilabel-indicator format # for multilabel and multi-output multi-class classifier X, y = make_multilabel_classification(allow_unlabeled=False, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Multi-output multi-class predict_proba clf = DecisionTreeClassifier() clf.fit(X_train, y_train) y_proba = clf.predict_proba(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T) assert_almost_equal(score1, score2) # Multi-output multi-class decision_function # TODO Is there any yet? clf = DecisionTreeClassifier() clf.fit(X_train, y_train) clf._predict_proba = clf.predict_proba clf.predict_proba = None clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] y_proba = clf.decision_function(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T) assert_almost_equal(score1, score2) # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)) assert_almost_equal(score1, score2) # Multilabel decision function clf = OneVsRestClassifier(LinearSVC(random_state=0)) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) assert_almost_equal(score1, score2)
Example #3
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_supervised_cluster_scorers(): # Test clustering scorers against gold standard labeling. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) km = KMeans(n_clusters=3) km.fit(X_train) for name in CLUSTER_SCORERS: score1 = get_scorer(name)(km, X_test, y_test) score2 = getattr(cluster_module, name)(y_test, km.predict(X_test)) assert_almost_equal(score1, score2)
Example #4
Source File: eval.py From palladium with Apache License 2.0 | 5 votes |
def test(dataset_loader_test, model_persister, scoring=None, model_version=None): with timer(logger.info, "Loading data"): X, y = dataset_loader_test() with timer(logger.info, "Reading model"): model = model_persister.read(version=model_version) logger.info( 'Loaded model version {}'.format(model.__metadata__['version'])) if not (hasattr(model, 'score') or scoring is not None): raise ValueError( "Your model doesn't seem to implement a 'score' method. You may " "want to define a 'scoring' option in the configuration." ) with timer(logger.info, "Applying model"): scores = [] if scoring is not None: if not isinstance(scoring, dict): scoring = {'score': scoring} for key, scorer in scoring.items(): scorer = get_scorer(scorer) scores.append("{}: {}".format(key, scorer(model, X, y))) else: scores.append("score: {}".format(model.score(X, y))) logger.info("Score: {}.".format('\n '.join(scores))) return scores
Example #5
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
Example #6
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) assert_almost_equal(score1, score3) logscore = get_scorer('neg_log_loss')(clf, X_test, y_test) logloss = log_loss(y_test, clf.predict_proba(X_test)) assert_almost_equal(-logscore, logloss) # same for an estimator without decision_function clf = DecisionTreeClassifier() clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) score1 = get_scorer('roc_auc')(reg, X_test, y_test) score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) assert_raises(ValueError, get_scorer('roc_auc'), clf, X_test, y_test)
Example #7
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_thresholded_scorers_multilabel_indicator_data(): # Test that the scorer work with multilabel-indicator format # for multilabel and multi-output multi-class classifier X, y = make_multilabel_classification(allow_unlabeled=False, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Multi-output multi-class predict_proba clf = DecisionTreeClassifier() clf.fit(X_train, y_train) y_proba = clf.predict_proba(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T) assert_almost_equal(score1, score2) # Multi-output multi-class decision_function # TODO Is there any yet? clf = DecisionTreeClassifier() clf.fit(X_train, y_train) clf._predict_proba = clf.predict_proba clf.predict_proba = None clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] y_proba = clf.decision_function(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T) assert_almost_equal(score1, score2) # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)) assert_almost_equal(score1, score2) # Multilabel decision function clf = OneVsRestClassifier(LinearSVC(random_state=0)) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) assert_almost_equal(score1, score2)
Example #8
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_supervised_cluster_scorers(): # Test clustering scorers against gold standard labeling. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) km = KMeans(n_clusters=3) km.fit(X_train) for name in CLUSTER_SCORERS: score1 = get_scorer(name)(km, X_test, y_test) score2 = getattr(cluster_module, name)(y_test, km.predict(X_test)) assert_almost_equal(score1, score2)
Example #9
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_classification_scores(): # Test classification scorers. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) clf.fit(X_train, y_train) for prefix, metric in [('f1', f1_score), ('precision', precision_score), ('recall', recall_score), ('jaccard', jaccard_score)]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='weighted') assert_almost_equal(score1, score2) score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='macro') assert_almost_equal(score1, score2) score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='micro') assert_almost_equal(score1, score2) score1 = get_scorer('%s' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=1) assert_almost_equal(score1, score2) # test fbeta score that takes an argument scorer = make_scorer(fbeta_score, beta=2) score1 = scorer(clf, X_test, y_test) score2 = fbeta_score(y_test, clf.predict(X_test), beta=2) assert_almost_equal(score1, score2) # test that custom scorer can be pickled unpickled_scorer = pickle.loads(pickle.dumps(scorer)) score3 = unpickled_scorer(clf, X_test, y_test) assert_almost_equal(score1, score3) # smoke test the repr: repr(fbeta_score)
Example #10
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) assert_almost_equal(score1, score3) logscore = get_scorer('neg_log_loss')(clf, X_test, y_test) logloss = log_loss(y_test, clf.predict_proba(X_test)) assert_almost_equal(-logscore, logloss) # same for an estimator without decision_function clf = DecisionTreeClassifier() clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) score1 = get_scorer('roc_auc')(reg, X_test, y_test) score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) with pytest.raises(ValueError, match="multiclass format is not supported"): get_scorer('roc_auc')(clf, X_test, y_test) # test error is raised with a single class present in model # (predict_proba shape is not suitable for binary auc) X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = DecisionTreeClassifier() clf.fit(X_train, np.zeros_like(y_train)) with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('roc_auc')(clf, X_test, y_test) # for proba scorers with pytest.raises(ValueError, match="need classifier with two classes"): get_scorer('neg_log_loss')(clf, X_test, y_test)
Example #11
Source File: fit.py From palladium with Apache License 2.0 | 4 votes |
def fit(dataset_loader_train, model, model_persister, persist=True, activate=True, dataset_loader_test=None, evaluate=False, persist_if_better_than=None, scoring=None): if persist_if_better_than is not None: evaluate = True if dataset_loader_test is None: raise ValueError( "When using 'persist_if_better_than', make sure you also " "provide a 'dataset_loader_test'." ) if evaluate and not (hasattr(model, 'score') or scoring is not None): raise ValueError( "Your model doesn't seem to implement a 'score' method. You may " "want to define a 'scoring' option in the configuration." ) if scoring is not None: scorer = get_scorer(scoring) else: def scorer(model, X, y): return model.score(X, y) with timer(logger.info, "Loading data"): X, y = dataset_loader_train() with timer(logger.info, "Fitting model"): model.fit(X, y) if evaluate: with timer(logger.debug, "Evaluating model on train set"): score_train = scorer(model, X, y) annotate(model, {'score_train': score_train}) logger.info("Train score: {}".format(score_train)) X, y = None, None gc.collect() score_test = None if evaluate and dataset_loader_test is not None: with timer(logger.info, "Loading test data"): X_test, y_test = dataset_loader_test() with timer(logger.debug, "Evaluating model on test set"): score_test = scorer(model, X_test, y_test) annotate(model, {'score_test': score_test}) logger.info("Test score: {}".format(score_test)) if persist: if (persist_if_better_than is not None and score_test < persist_if_better_than): logger.info("Not persisting model that has a test score " "{} < {}".format(score_test, persist_if_better_than)) else: _persist_model(model, model_persister, activate=activate) return model
Example #12
Source File: sklearn_funcs.py From bayesmark with Apache License 2.0 | 4 votes |
def __init__(self, model, dataset, metric, shuffle_seed=0, data_root=None): """Build class that wraps sklearn classifier/regressor CV score for use as an objective function. Parameters ---------- model : str Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is classification or regression. dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. metric : str Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is classification or regression. shuffle_seed : int Random seed to use when splitting the data into train and validation in the cross-validation splits. This is needed in order to keep the split constant across calls. Otherwise there would be extra noise in the objective function for varying splits. data_root : str Root directory to look for all custom csv files. """ TestFunction.__init__(self) data, target, problem_type = load_data(dataset, data_root=data_root) assert problem_type in (ProblemType.clf, ProblemType.reg) self.is_classifier = problem_type == ProblemType.clf # Do some validation on loaded data assert isinstance(data, np.ndarray) assert isinstance(target, np.ndarray) assert data.ndim == 2 and target.ndim == 1 assert data.shape[0] == target.shape[0] assert data.size > 0 assert data.dtype == np.float_ assert np.all(np.isfinite(data)) # also catch nan assert target.dtype == (np.int_ if self.is_classifier else np.float_) assert np.all(np.isfinite(target)) # also catch nan model_lookup = MODELS_CLF if self.is_classifier else MODELS_REG base_model, fixed_params, api_config = model_lookup[model] # New members for model self.base_model = base_model self.fixed_params = fixed_params self.api_config = api_config # Always shuffle your data to be safe. Use fixed seed for reprod. self.data_X, self.data_Xt, self.data_y, self.data_yt = train_test_split( data, target, test_size=0.2, random_state=shuffle_seed, shuffle=True ) assert metric in METRICS, "Unknown metric %s" % metric assert metric in METRICS_LOOKUP[problem_type], "Incompatible metric %s with problem type %s" % ( metric, problem_type, ) self.scorer = get_scorer(SklearnModel._METRIC_MAP[metric])
Example #13
Source File: sklearn_funcs.py From bayesmark with Apache License 2.0 | 4 votes |
def evaluate(self, params): """Evaluate the sklearn CV objective at a particular parameter setting. Parameters ---------- params : dict(str, object) The varying (non-fixed) parameter dict to the sklearn model. Returns ------- cv_loss : float Average loss over CV splits for sklearn model when tested using the settings in params. """ params = dict(params) # copy to avoid modification of original params.update(self.fixed_params) # add in fixed params # now build the skl object clf = self.base_model(**params) assert np.all(np.isfinite(self.data_X)), "all features must be finite" assert np.all(np.isfinite(self.data_y)), "all targets must be finite" # Do the x-val, ignore user warn since we expect BO to try weird stuff with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) S = cross_val_score(clf, self.data_X, self.data_y, scoring=self.scorer, cv=CV_SPLITS) # Take the mean score across all x-val splits cv_score = np.mean(S) # Now let's get the generalization error for same hypers clf = self.base_model(**params) clf.fit(self.data_X, self.data_y) generalization_score = self.scorer(clf, self.data_Xt, self.data_yt) # get_scorer makes everything a score not a loss, so we need to negate to get the loss back cv_loss = -cv_score assert np.isfinite(cv_loss), "loss not even finite" generalization_loss = -generalization_score assert np.isfinite(generalization_loss), "loss not even finite" # Unbox to basic float to keep it simple cv_loss = cv_loss.item() assert isinstance(cv_loss, float) generalization_loss = generalization_loss.item() assert isinstance(generalization_loss, float) # For now, score with same objective. We can later add generalization error return cv_loss, generalization_loss
Example #14
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_classification_scores(): # Test classification scorers. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) clf.fit(X_train, y_train) for prefix, metric in [('f1', f1_score), ('precision', precision_score), ('recall', recall_score)]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='weighted') assert_almost_equal(score1, score2) score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='macro') assert_almost_equal(score1, score2) score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='micro') assert_almost_equal(score1, score2) score1 = get_scorer('%s' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=1) assert_almost_equal(score1, score2) # test fbeta score that takes an argument scorer = make_scorer(fbeta_score, beta=2) score1 = scorer(clf, X_test, y_test) score2 = fbeta_score(y_test, clf.predict(X_test), beta=2) assert_almost_equal(score1, score2) # test that custom scorer can be pickled unpickled_scorer = pickle.loads(pickle.dumps(scorer)) score3 = unpickled_scorer(clf, X_test, y_test) assert_almost_equal(score1, score3) # smoke test the repr: repr(fbeta_score)