Python sklearn.model_selection.cross_validate() Examples
The following are 25
code examples of sklearn.model_selection.cross_validate().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: encoding_examples.py From category_encoders with BSD 3-Clause "New" or "Revised" License | 6 votes |
def score_models(clf, X, y, encoder, runs=1): """ Takes in a classifier that supports multiclass classification, and X and a y, and returns a cross validation score. """ scores = [] X_test = None for _ in range(runs): X_test = encoder().fit_transform(X, y) # Some models, like logistic regression, like normalized features otherwise they underperform and/or take a long time to converge. # To be rigorous, we should have trained the normalization on each fold individually via pipelines. # See grid_search_example to learn how to do it. X_test = StandardScaler().fit_transform(X_test) scores.append(cross_validate(clf, X_test, y, n_jobs=1, cv=5)['test_score']) gc.collect() scores = [y for z in [x for x in scores] for y in z] return float(np.mean(scores)), float(np.std(scores)), scores, X_test.shape[1]
Example #2
Source File: BaseDensityEstimator.py From Conditional_Density_Estimation with MIT License | 6 votes |
def eval_by_cv(self, X, Y, n_splits=5, verbose=True): """ Fits the conditional density model with cross-validation by using the score function of the BaseDensityEstimator for scoring the various splits. Args: X: numpy array to be conditioned on - shape: (n_samples, n_dim_x) Y: numpy array of y targets - shape: (n_samples, n_dim_y) n_splits: number of cross-validation folds (positive integer) verbose: the verbosity level """ X, Y = self._handle_input_dimensionality(X, Y, fitting=True) cv_results = cross_validate(self, X=X, y=Y, cv=n_splits, return_estimator=True, verbose=verbose) test_scores = cv_results['test_score'] test_scores_max_idx = np.nanargmax(test_scores) estimator = cv_results['estimator'][test_scores_max_idx] self.set_params(**estimator.get_params()) self.fit(X, Y)
Example #3
Source File: regression_tests.py From drifter_ml with MIT License | 6 votes |
def tae_cv(self, cv): """ This method performs cross-validation over trimean absolute error. Parameters ---------- * cv : integer The number of cross validation folds to perform Returns ------- Returns a scores of the k-fold trimean absolute error. """ tse = metrics.make_scorer(self.trimean_absolute_error) result = cross_validate(self.reg, self.X, self.y, cv=cv, scoring=(tse)) return self.get_test_score(result)
Example #4
Source File: regression_tests.py From drifter_ml with MIT License | 6 votes |
def tse_cv(self, cv): """ This method performs cross-validation over trimean squared error. Parameters ---------- * cv : integer The number of cross validation folds to perform Returns ------- Returns a scores of the k-fold trimean squared error. """ tse = metrics.make_scorer(self.trimean_squared_error) result = cross_validate(self.reg, self.X, self.y, cv=cv, scoring=(tse)) return self.get_test_score(result)
Example #5
Source File: regression_tests.py From drifter_ml with MIT License | 6 votes |
def mse_cv(self, cv): """ This method performs cross-validation over mean squared error. Parameters ---------- * cv : integer The number of cross validation folds to perform Returns ------- Returns a scores of the k-fold mean squared error. """ mse = metrics.make_scorer(metrics.mean_squared_error) result = cross_validate(self.reg, self.X, self.y, cv=cv, scoring=(mse)) return self.get_test_score(result)
Example #6
Source File: regression_tests.py From drifter_ml with MIT License | 6 votes |
def mae_cv(self, cv): """ This method performs cross-validation over median absolute error. Parameters ---------- * cv : integer The number of cross validation folds to perform Returns ------- Returns a scores of the k-fold median absolute error. """ mae = metrics.make_scorer(metrics.median_absolute_error) result = cross_validate(self.reg, self.X, self.y, cv=cv, scoring=(mae)) return self.get_test_score(result)
Example #7
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_cross_validate_return_train_score_warn(): # Test that warnings are raised. Will be removed in 0.21 X, y = make_classification(random_state=0) estimator = MockClassifier() result = {} for val in [False, True, 'warn']: result[val] = assert_no_warnings(cross_validate, estimator, X, y, return_train_score=val) msg = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format('train_score') train_score = assert_warns_message(FutureWarning, msg, result['warn'].get, 'train_score') assert np.allclose(train_score, result[True]['train_score']) assert 'train_score' not in result[False]
Example #8
Source File: test_anomaly_detectors.py From gordo with GNU Affero General Public License v3.0 | 6 votes |
def test_diff_detector_require_thresholds(require_threshold: bool): """ Should fail if requiring thresholds, but not calling cross_validate """ X = pd.DataFrame(np.random.random((100, 5))) y = pd.DataFrame(np.random.random((100, 2))) model = DiffBasedAnomalyDetector( base_estimator=MultiOutputRegressor(LinearRegression()), require_thresholds=require_threshold, ) model.fit(X, y) if require_threshold: # FAIL: Forgot to call .cross_validate to calculate thresholds. with pytest.raises(AttributeError): model.anomaly(X, y) model.cross_validate(X=X, y=y) model.anomaly(X, y) else: # thresholds not required model.anomaly(X, y)
Example #9
Source File: test_anomaly_detectors.py From gordo with GNU Affero General Public License v3.0 | 6 votes |
def test_diff_detector_cross_validate(return_estimator: bool): """ DiffBasedAnomalyDetector.cross_validate implementation should be the same as sklearn.model_selection.cross_validate if called the same. And it always will update `return_estimator` to True, as it requires the intermediate models to calculate the thresholds """ X = np.random.random((100, 10)) y = np.random.random((100, 1)) model = DiffBasedAnomalyDetector(base_estimator=LinearRegression()) cv = TimeSeriesSplit(n_splits=3) cv_results_da = model.cross_validate( X=X, y=y, cv=cv, return_estimator=return_estimator ) cv_results_sk = cross_validate(model, X=X, y=y, cv=cv, return_estimator=True) assert cv_results_da.keys() == cv_results_sk.keys()
Example #10
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_cross_validate_single_metric(clf, X, y, scores): (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores) = scores # Test single metric evaluation when scoring is string or singleton list for (return_train_score, dict_len) in ((True, 4), (False, 3)): # Single metric passed as a string if return_train_score: # It must be True by default mse_scores_dict = cross_validate(clf, X, y, cv=5, scoring='neg_mean_squared_error') assert_array_almost_equal(mse_scores_dict['train_score'], train_mse_scores) else: mse_scores_dict = cross_validate(clf, X, y, cv=5, scoring='neg_mean_squared_error', return_train_score=False) assert_true(isinstance(mse_scores_dict, dict)) assert_equal(len(mse_scores_dict), dict_len) assert_array_almost_equal(mse_scores_dict['test_score'], test_mse_scores) # Single metric passed as a list if return_train_score: # It must be True by default r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2']) assert_array_almost_equal(r2_scores_dict['train_r2'], train_r2_scores) else: r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2'], return_train_score=False) assert_true(isinstance(r2_scores_dict, dict)) assert_equal(len(r2_scores_dict), dict_len) assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores)
Example #11
Source File: tester.py From Text-Classification-Benchmark with MIT License | 5 votes |
def get_cv_scores(estimator, X, y, scoring, cv=5): return cross_validate(estimator, X, y, cv=cv, n_jobs=1, scoring=scoring, return_train_score=False)
Example #12
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_cross_validate_many_jobs(): # regression test for #12154: cv='warn' with n_jobs>1 trigger a copy of # the parameters leading to a failure in check_cv due to cv is 'warn' # instead of cv == 'warn'. X, y = load_iris(return_X_y=True) clf = SVC(gamma='auto') grid = GridSearchCV(clf, param_grid={'C': [1, 10]}) cross_validate(grid, X, y, n_jobs=2)
Example #13
Source File: diff.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def __init__( self, base_estimator: BaseEstimator = KerasAutoEncoder(kind="feedforward_hourglass"), scaler: TransformerMixin = RobustScaler(), require_thresholds: bool = True, window=None, ): """ Classifier which wraps a ``base_estimator`` and provides a diff error based approach to anomaly detection. It trains a ``scaler`` to the target **after** training, purely for error calculations. The underlying ``base_estimator`` is trained with the original, unscaled, ``y``. Parameters ---------- base_estimator: sklearn.base.BaseEstimator The model to which normal ``.fit``, ``.predict`` methods will be used. defaults to py:class:`gordo.machine.model.models.KerasAutoEncoder` with ``kind='feedforward_hourglass`` scaler: sklearn.base.TransformerMixin Defaults to ``sklearn.preprocessing.RobustScaler`` Used for transforming model output and the original ``y`` to calculate the difference/error in model output vs expected. require_thresholds: bool Requires calculating ``thresholds_`` via a call to :func:`~DiffBasedAnomalyDetector.cross_validate`. If this is set (default True), but :func:`~DiffBasedAnomalyDetector.cross_validate` was not called before calling :func:`~DiffBasedAnomalyDetector.anomaly` an ``AttributeError`` will be raised. window: int Window size for smoothed thresholds """ self.base_estimator = base_estimator self.scaler = scaler self.require_thresholds = require_thresholds self.window = window
Example #14
Source File: sklearn.py From optuna with MIT License | 5 votes |
def __call__(self, trial): # type: (trial_module.Trial) -> float estimator = clone(self.estimator) params = self._get_params(trial) estimator.set_params(**params) if self.enable_pruning: scores = self._cross_validate_with_pruning(trial, estimator) else: scores = cross_validate( estimator, self.X, self.y, cv=self.cv, error_score=self.error_score, fit_params=self.fit_params, groups=self.groups, return_train_score=self.return_train_score, scoring=self.scoring, ) self._store_scores(trial, scores) return trial.user_attrs["mean_test_score"]
Example #15
Source File: pipelinecomponents.py From sia-cog with MIT License | 5 votes |
def model_evaluate(X, Y, pipeline): try: results = [] if "scoring" in pipeline["options"]: if len(pipeline['options']['scoring']) > 0: scoring = pipeline['options']['scoring'] else: scoring = "neg_mean_squared_error" else: scoring = "neg_mean_squared_error" kfold = 10 if "kfold" in pipeline['options']: kfold = int(pipeline["options"]["kfold"]) model = scikitlearn.getSKLearnModel(pipeline['options']['model_name']) valresult = cross_validate(model, X, Y, cv=kfold, scoring=scoring, return_train_score=True) model.fit(X, Y) for p in valresult: results.append({"param": p, "values": valresult[p].tolist(), "min": valresult[p].min, "max": valresult[p].max}); output = jsonpickle.encode(results, unpicklable=False) projectmgr.UpdateExecuteResult(jobid, output) picklefile = projectfolder + "/model.out" with open(picklefile, "wb") as f: pickle.dump(model, f) return output except Exception as e: raise Exception("model_evaluate: " + str(e))
Example #16
Source File: lofo_importance.py From lofo-importance with MIT License | 5 votes |
def _get_cv_score(self, feature_to_remove): X, fit_params = self.dataset.getX(feature_to_remove=feature_to_remove, fit_params=self.fit_params) y = self.dataset.y with warnings.catch_warnings(): warnings.simplefilter("ignore") cv_results = cross_validate(self.model, X, y, cv=self.cv, scoring=self.scoring, fit_params=fit_params) return cv_results['test_score']
Example #17
Source File: coolscience.py From DataScience-webapp-with-flask with MIT License | 4 votes |
def model_process(dataset = dataset): algscore = request.form.get('model') res = request.form.get('response') kfold = request.form.get('kfold') alg, score = algscore.split('.') scaling = request.form.get('scaling') variables = request.form.getlist('variables') from sklearn.model_selection import cross_validate from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline df = loadDataset(dataset) y = df[str(res)] if variables != [] and '' not in variables: df = df[list(set(variables + [res]))] X = df.drop(str(res), axis=1) try: X = pd.get_dummies(X) except: pass predictors = X.columns if len(predictors)>10: pred = str(len(predictors)) else: pred = ', '.join(predictors) if score == 'Classification': from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc scoring = ['precision', 'recall', 'f1', 'accuracy', 'roc_auc'] if scaling == 'Yes': clf = algorithms.classificationModels()[alg] mod = Pipeline([('scaler', StandardScaler()), ('clf', clf)]) else: mod = algorithms.classificationModels()[alg] fig = plotfun.plot_ROC(X.values, y, mod, int(kfold)) elif score == 'Regression': from sklearn.metrics import explained_variance_score, r2_score, mean_squared_error scoring = ['explained_variance', 'r2', 'mean_squared_error'] if scaling == 'Yes': pr = algorithms.regressionModels()[alg] mod = Pipeline([('scaler', StandardScaler()), ('clf', pr)]) else: mod = algorithms.regressionModels()[alg] fig = plotfun.plot_predVSreal(X, y, mod, int(kfold)) scores = cross_validate(mod, X, y, cv=int(kfold), scoring=scoring) for s in scores: scores[s] = str(round(np.mean(scores[s]),3)) return render_template('scores.html', scores = scores, dataset = dataset, alg=alg, res = res, kfold = kfold, score = score, predictors = pred, response = str(fig, 'utf-8'))
Example #18
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 4 votes |
def check_cross_validate_multi_metric(clf, X, y, scores): # Test multimetric evaluation when scoring is a list / dict (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores) = scores all_scoring = (('r2', 'neg_mean_squared_error'), {'r2': make_scorer(r2_score), 'neg_mean_squared_error': 'neg_mean_squared_error'}) keys_sans_train = set(('test_r2', 'test_neg_mean_squared_error', 'fit_time', 'score_time')) keys_with_train = keys_sans_train.union( set(('train_r2', 'train_neg_mean_squared_error'))) for return_train_score in (True, False): for scoring in all_scoring: if return_train_score: # return_train_score must be True by default cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring) assert_array_almost_equal(cv_results['train_r2'], train_r2_scores) assert_array_almost_equal( cv_results['train_neg_mean_squared_error'], train_mse_scores) else: cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring, return_train_score=False) assert_true(isinstance(cv_results, dict)) assert_equal(set(cv_results.keys()), keys_with_train if return_train_score else keys_sans_train) assert_array_almost_equal(cv_results['test_r2'], test_r2_scores) assert_array_almost_equal( cv_results['test_neg_mean_squared_error'], test_mse_scores) # Make sure all the arrays are of np.ndarray type assert type(cv_results['test_r2']) == np.ndarray assert (type(cv_results['test_neg_mean_squared_error']) == np.ndarray) assert type(cv_results['fit_time']) == np.ndarray assert type(cv_results['score_time']) == np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results['fit_time'] >= 0) assert np.all(cv_results['fit_time'] < 10) assert np.all(cv_results['score_time'] >= 0) assert np.all(cv_results['score_time'] < 10)
Example #19
Source File: classification_tests.py From drifter_ml with MIT License | 4 votes |
def roc_auc_cv(self, cv, average="micro"): """ This method performs cross-validation over roc_auc. Parameters ---------- * cv : integer The number of cross validation folds to perform * average : string, [None, 'binary'(default), 'micro', 'macro', 'samples', 'weighted'] This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. 'binary': Only report results for the class specified by pos_label. This is applicable only if targets (y_{true, pred}) are binary. 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives. 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that isnot between precision and recall. 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from accuracy_score). Returns ------- Returns a scores of the k-fold roc_auc. """ roc_auc_score = partial(self.roc_auc_score, average=average) roc_auc = metrics.make_scorer(roc_auc_score) result = cross_validate(self.clf, self.X, self.y, cv=cv, scoring=(roc_auc)) return self.get_test_score(result)
Example #20
Source File: classification_tests.py From drifter_ml with MIT License | 4 votes |
def f1_cv(self, cv, average='binary'): """ This method performs cross-validation over f1-score. Parameters ---------- * cv : integer The number of cross validation folds to perform * average : string, [None, 'binary'(default), 'micro', 'macro', 'samples', 'weighted'] This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. 'binary': Only report results for the class specified by pos_label. This is applicable only if targets (y_{true, pred}) are binary. 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives. 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that isnot between precision and recall. 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from accuracy_score). Returns ------- Returns a scores of the k-fold f1-score. """ average = self.reset_average(average) f1_score = partial(self.f1_score, average=average) f1 = metrics.make_scorer(f1_score) result = cross_validate(self.clf, self.X, self.y, cv=cv, scoring=(f1)) return self.get_test_score(result)
Example #21
Source File: classification_tests.py From drifter_ml with MIT License | 4 votes |
def precision_cv(self, cv, average='binary'): """ This method performs cross-validation over precision. Parameters ---------- * cv : integer The number of cross validation folds to perform * average : string, [None, 'binary'(default), 'micro', 'macro', 'samples', 'weighted'] This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. 'binary': Only report results for the class specified by pos_label. This is applicable only if targets (y_{true, pred}) are binary. 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives. 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that isnot between precision and recall. 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from accuracy_score). Returns ------- Returns a scores of the k-fold precision. """ average = self.reset_average(average) precision_score = partial(self.precision_score, average=average) precision = metrics.make_scorer(precision_score) result = cross_validate(self.clf, self.X, self.y, cv=cv, scoring=(precision)) return self.get_test_score(result)
Example #22
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_fit_and_score_failing(): # Create a failing classifier to deliberately fail failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER) # dummy X data X = np.arange(1, 10) y = np.ones(9) fit_and_score_args = [failing_clf, X, None, dict(), None, None, 0, None, None] # passing error score to trigger the warning message fit_and_score_kwargs = {'error_score': 0} # check if the warning message type is as expected assert_warns(FitFailedWarning, _fit_and_score, *fit_and_score_args, **fit_and_score_kwargs) # since we're using FailingClassfier, our error will be the following error_message = "ValueError: Failing classifier failed as required" # the warning message we're expecting to see warning_message = ("Estimator fit failed. The score on this train-test " "partition for these parameters will be set to %f. " "Details: \n%s" % (fit_and_score_kwargs['error_score'], error_message)) # check if the same warning is triggered assert_warns_message(FitFailedWarning, warning_message, _fit_and_score, *fit_and_score_args, **fit_and_score_kwargs) # check if warning was raised, with default error_score argument warning_message = ("From version 0.22, errors during fit will result " "in a cross validation score of NaN by default. Use " "error_score='raise' if you want an exception " "raised or error_score=np.nan to adopt the " "behavior from version 0.22.") with pytest.raises(ValueError): assert_warns_message(FutureWarning, warning_message, _fit_and_score, *fit_and_score_args) fit_and_score_kwargs = {'error_score': 'raise'} # check if exception was raised, with default error_score='raise' assert_raise_message(ValueError, "Failing classifier failed as required", _fit_and_score, *fit_and_score_args, **fit_and_score_kwargs) # check that functions upstream pass error_score param to _fit_and_score error_message = ("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") assert_raise_message(ValueError, error_message, cross_validate, failing_clf, X, cv=3, error_score='unvalid-string') assert_raise_message(ValueError, error_message, cross_val_score, failing_clf, X, cv=3, error_score='unvalid-string') assert_raise_message(ValueError, error_message, learning_curve, failing_clf, X, y, cv=3, error_score='unvalid-string') assert_raise_message(ValueError, error_message, validation_curve, failing_clf, X, y, 'parameter', [FailingClassifier.FAILING_PARAMETER], cv=3, error_score='unvalid-string') assert_equal(failing_clf.score(), 0.) # FailingClassifier coverage
Example #23
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def check_cross_validate_multi_metric(clf, X, y, scores): # Test multimetric evaluation when scoring is a list / dict (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores, fitted_estimators) = scores all_scoring = (('r2', 'neg_mean_squared_error'), {'r2': make_scorer(r2_score), 'neg_mean_squared_error': 'neg_mean_squared_error'}) keys_sans_train = {'test_r2', 'test_neg_mean_squared_error', 'fit_time', 'score_time'} keys_with_train = keys_sans_train.union( {'train_r2', 'train_neg_mean_squared_error'}) for return_train_score in (True, False): for scoring in all_scoring: if return_train_score: # return_train_score must be True by default - deprecated cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring, return_train_score=True) assert_array_almost_equal(cv_results['train_r2'], train_r2_scores) assert_array_almost_equal( cv_results['train_neg_mean_squared_error'], train_mse_scores) else: cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring, return_train_score=False) assert isinstance(cv_results, dict) assert_equal(set(cv_results.keys()), keys_with_train if return_train_score else keys_sans_train) assert_array_almost_equal(cv_results['test_r2'], test_r2_scores) assert_array_almost_equal( cv_results['test_neg_mean_squared_error'], test_mse_scores) # Make sure all the arrays are of np.ndarray type assert type(cv_results['test_r2']) == np.ndarray assert (type(cv_results['test_neg_mean_squared_error']) == np.ndarray) assert type(cv_results['fit_time']) == np.ndarray assert type(cv_results['score_time']) == np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results['fit_time'] >= 0) assert np.all(cv_results['fit_time'] < 10) assert np.all(cv_results['score_time'] >= 0) assert np.all(cv_results['score_time'] < 10)
Example #24
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def check_cross_validate_single_metric(clf, X, y, scores): (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores, fitted_estimators) = scores # Test single metric evaluation when scoring is string or singleton list for (return_train_score, dict_len) in ((True, 4), (False, 3)): # Single metric passed as a string if return_train_score: mse_scores_dict = cross_validate(clf, X, y, cv=5, scoring='neg_mean_squared_error', return_train_score=True) assert_array_almost_equal(mse_scores_dict['train_score'], train_mse_scores) else: mse_scores_dict = cross_validate(clf, X, y, cv=5, scoring='neg_mean_squared_error', return_train_score=False) assert isinstance(mse_scores_dict, dict) assert_equal(len(mse_scores_dict), dict_len) assert_array_almost_equal(mse_scores_dict['test_score'], test_mse_scores) # Single metric passed as a list if return_train_score: # It must be True by default - deprecated r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2'], return_train_score=True) assert_array_almost_equal(r2_scores_dict['train_r2'], train_r2_scores, True) else: r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2'], return_train_score=False) assert isinstance(r2_scores_dict, dict) assert_equal(len(r2_scores_dict), dict_len) assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores) # Test return_estimator option mse_scores_dict = cross_validate(clf, X, y, cv=5, scoring='neg_mean_squared_error', return_estimator=True) for k, est in enumerate(mse_scores_dict['estimator']): assert_almost_equal(est.coef_, fitted_estimators[k].coef_) assert_almost_equal(est.intercept_, fitted_estimators[k].intercept_)
Example #25
Source File: test_anomaly_detectors.py From gordo with GNU Affero General Public License v3.0 | 4 votes |
def test_diff_detector_threshold(n_features_y: int, n_features_x: int): """ Basic construction logic of thresholds_ attribute in the DiffBasedAnomalyDetector """ X = np.random.random((100, n_features_x)) y = np.random.random((100, n_features_y)) model = DiffBasedAnomalyDetector( base_estimator=MultiOutputRegressor(estimator=LinearRegression()) ) # Model has own implementation of cross_validate assert hasattr(model, "cross_validate") # When initialized it should not have a threshold calculated. assert not hasattr(model, "feature_thresholds_") assert not hasattr(model, "aggregate_threshold_") assert not hasattr(model, "feature_thresholds_per_fold_") assert not hasattr(model, "aggregate_thresholds_per_fold_") model.fit(X, y) # Until it has done cross validation, it has no threshold. assert not hasattr(model, "feature_thresholds_") assert not hasattr(model, "aggregate_threshold_") assert not hasattr(model, "feature_thresholds_per_fold_") assert not hasattr(model, "aggregate_thresholds_per_fold_") # Calling cross validate should set the threshold for it. model.cross_validate(X=X, y=y) # Now we have calculated thresholds based on cross validation folds assert hasattr(model, "feature_thresholds_") assert hasattr(model, "aggregate_threshold_") assert hasattr(model, "feature_thresholds_per_fold_") assert hasattr(model, "aggregate_thresholds_per_fold_") assert isinstance(model.feature_thresholds_, pd.Series) assert len(model.feature_thresholds_) == y.shape[1] assert all(model.feature_thresholds_.notna()) assert isinstance(model.feature_thresholds_per_fold_, pd.DataFrame) assert isinstance(model.aggregate_thresholds_per_fold_, dict)