Python sklearn.model_selection.check_cv() Examples
The following are 17
code examples of sklearn.model_selection.check_cv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: _search.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def check_cv(cv=3, y=None, classifier=False): """Dask aware version of ``sklearn.model_selection.check_cv`` Same as the scikit-learn version, but works if ``y`` is a dask object. """ if cv is None: cv = 3 # If ``cv`` is not an integer, the scikit-learn implementation doesn't # touch the ``y`` object, so passing on a dask object is fine if not is_dask_collection(y) or not isinstance(cv, numbers.Integral): return model_selection.check_cv(cv, y, classifier=classifier) if classifier: # ``y`` is a dask object. We need to compute the target type target_type = delayed(type_of_target, pure=True)(y).compute() if target_type in ("binary", "multiclass"): return StratifiedKFold(cv) return KFold(cv)
Example #2
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cv_iterable_wrapper(): kf_iter = KFold(n_splits=5).split(X, y) kf_iter_wrapped = check_cv(kf_iter) # Since the wrapped iterable is enlisted and stored, # split can be called any number of times to produce # consistent results. np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)), list(kf_iter_wrapped.split(X, y))) # If the splits are randomized, successive calls to split yields different # results kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y) kf_randomized_iter_wrapped = check_cv(kf_randomized_iter) # numpy's assert_array_equal properly compares nested lists np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) try: np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) splits_are_equal = True except AssertionError: splits_are_equal = False assert not splits_are_equal, ( "If the splits are randomized, " "successive calls to split should yield different results")
Example #3
Source File: stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _check_cv(self, y): """Overrides base class _check_cv """ # Squeezed target should be 1-dimensional if len(y.shape) != 1: raise NotImplementedError("StackedClassifier does not currently " "support multi-column classification " "problems. If your target is a one-hot " "encoded multi-class problem, please " "recast it to a single column.") return check_cv(self.cv, y=y, classifier=True)
Example #4
Source File: dataset.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __call__(self, dataset, y=None, groups=None): bad_y_error = ValueError( "Stratified CV requires explicitly passing a suitable y.") if (y is None) and self.stratified: raise bad_y_error cv = self.check_cv(y) if self.stratified and not self._is_stratified(cv): raise bad_y_error # pylint: disable=invalid-name len_dataset = get_len(dataset) if y is not None: len_y = get_len(y) if len_dataset != len_y: raise ValueError("Cannot perform a CV split if dataset and y " "have different lengths.") args = (np.arange(len_dataset),) if self._is_stratified(cv): args = args + (to_numpy(y),) idx_train, idx_valid = next(iter(cv.split(*args, groups=groups))) dataset_train = torch.utils.data.Subset(dataset, idx_train) dataset_valid = torch.utils.data.Subset(dataset, idx_valid) return dataset_train, dataset_valid
Example #5
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cv_iterable_wrapper(): y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) with warnings.catch_warnings(record=True): from sklearn.cross_validation import StratifiedKFold as OldSKF cv = OldSKF(y_multiclass, n_folds=3) wrapped_old_skf = _CVIterableWrapper(cv) # Check if split works correctly np.testing.assert_equal(list(cv), list(wrapped_old_skf.split())) # Check if get_n_splits works correctly assert_equal(len(cv), wrapped_old_skf.get_n_splits()) kf_iter = KFold(n_splits=5).split(X, y) kf_iter_wrapped = check_cv(kf_iter) # Since the wrapped iterable is enlisted and stored, # split can be called any number of times to produce # consistent results. np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)), list(kf_iter_wrapped.split(X, y))) # If the splits are randomized, successive calls to split yields different # results kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y) kf_randomized_iter_wrapped = check_cv(kf_randomized_iter) # numpy's assert_array_equal properly compares nested lists np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) try: np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)), list(kf_randomized_iter_wrapped.split(X, y))) splits_are_equal = True except AssertionError: splits_are_equal = False assert_false(splits_are_equal, "If the splits are randomized, " "successive calls to split should yield different results")
Example #6
Source File: search.py From sigopt-sklearn with MIT License | 5 votes |
def our_check_cv(cv, X, y, classifier): ret = base_check_cv(cv, X, y, classifier) return len(ret), list(iter(ret))
Example #7
Source File: search.py From sigopt-sklearn with MIT License | 5 votes |
def our_check_cv(cv, X, y, classifier): ret = base_check_cv(cv, y, classifier) return ret.n_splits, list(ret.split(X, y=y))
Example #8
Source File: stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_cv(self, y): """Overrides base class _check_cv """ return check_cv(self.cv, y=y, classifier=False)
Example #9
Source File: base.py From carl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def check_cv(cv=3, X=None, y=None, classifier=False): """Input checker utility for building a cross-validator. Parameters ---------- * `cv` [integer, cross-validation generator or an iterable, default=`3`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if classifier is True and `y` is either binary or multiclass, `StratifiedKFold` used. In all other cases, `KFold` is used. * `y` [array-like, optional]: The target variable for supervised learning problems. * `classifier` [boolean, default=`False`]: Whether the task is a classification task, in which case stratified `KFold` will be used. Returns ------- * `checked_cv` [a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via the `split` method. Note ---- This method is backported from scikit-learn 0.18. """ return sklearn_check_cv(cv, y=y, classifier=classifier)
Example #10
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _set_cv(cv, X, y, classifier): """This method returns either a `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17 or sklearn-0.18 is being used. Parameters ---------- cv : int, `_PartitionIterator` or `BaseCrossValidator` The CV object or int to check. If an int, will be converted into the appropriate class of crossvalidator. X : pd.DataFrame or np.ndarray, shape(n_samples, n_features) The dataframe or np.ndarray being fit in the grid search. y : np.ndarray, shape(n_samples,) The target being fit in the grid search. classifier : bool Whether the estimator being fit is a classifier Returns ------- `_PartitionIterator` or `BaseCrossValidator` """ return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
Example #11
Source File: dataset.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def check_cv(self, y): """Resolve which cross validation strategy is used.""" y_arr = None if self.stratified: # Try to convert y to numpy for sklearn's check_cv; if conversion # doesn't work, still try. try: y_arr = to_numpy(y) except (AttributeError, TypeError): y_arr = y if self._is_float(self.cv): return self._check_cv_float() return self._check_cv_non_float(y_arr)
Example #12
Source File: dataset.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_cv_non_float(self, y): return check_cv( self.cv, y=y, classifier=self.stratified, )
Example #13
Source File: cross_validation.py From Pyspatialml with GNU General Public License v3.0 | 5 votes |
def fit(self, X, y=None, groups=None, **fit_params): """ Run fit method with all sets of parameters Args ---- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning groups : array-like, shape = [n_samples], optional Training vector groups for cross-validation **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator """ # check estimator and cv methods are valid self.cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator)) # check for binary response if len(np.unique(y)) > 2: raise ValueError('Only a binary response vector is currently supported') # check that scoring metric has been specified if self.scoring is None: raise ValueError('No score function is defined') # perform cross validation prediction self.y_pred_ = cross_val_predict( estimator=self.estimator, X=X, y=y, groups=groups, cv=self.cv, method='predict_proba', n_jobs=self.n_jobs, **fit_params) self.y_true = y # add fold id to the predictions self.test_idx_ = [indexes[1] for indexes in self.cv.split(X, y, groups)]
Example #14
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_cv_default_warn(): # Test that warnings are raised. Will be removed in 0.22 assert_warns_message(FutureWarning, CV_WARNING, check_cv) assert_warns_message(FutureWarning, CV_WARNING, check_cv, None) assert_no_warnings(check_cv, cv=5)
Example #15
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_cv(): X = np.ones(9) cv = check_cv(3, classifier=False) # Use numpy.testing.assert_equal which recursively compares # lists of lists np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1]) cv = check_cv(3, y_binary, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_binary)), list(cv.split(X, y_binary))) y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) cv = check_cv(3, y_multiclass, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass)), list(cv.split(X, y_multiclass))) # also works with 2d multiclass y_multiclass_2d = y_multiclass.reshape(-1, 1) cv = check_cv(3, y_multiclass_2d, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass_2d)), list(cv.split(X, y_multiclass_2d))) assert not np.all( next(StratifiedKFold(3).split(X, y_multiclass_2d))[0] == next(KFold(3).split(X, y_multiclass_2d))[0]) X = np.ones(5) y_multilabel = np.array([[0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1], [1, 1, 0, 1], [0, 0, 1, 0]]) cv = check_cv(3, y_multilabel, classifier=True) np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]]) cv = check_cv(3, y_multioutput, classifier=True) np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) assert_raises(ValueError, check_cv, cv="lolo")
Example #16
Source File: split.py From nyaggle with MIT License | 5 votes |
def check_cv(cv: Union[int, Iterable, BaseCrossValidator] = 5, y: Optional[Union[pd.Series, np.ndarray]] = None, stratified: bool = False, random_state: int = 0): if cv is None: cv = 5 if isinstance(cv, numbers.Integral): if stratified and (y is not None) and (type_of_target(y) in ('binary', 'multiclass')): return StratifiedKFold(cv, shuffle=True, random_state=random_state) else: return KFold(cv, shuffle=True, random_state=random_state) return model_selection.check_cv(cv, y, stratified)
Example #17
Source File: test_split.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_check_cv(): X = np.ones(9) cv = check_cv(3, classifier=False) # Use numpy.testing.assert_equal which recursively compares # lists of lists np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1]) cv = check_cv(3, y_binary, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_binary)), list(cv.split(X, y_binary))) y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) cv = check_cv(3, y_multiclass, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass)), list(cv.split(X, y_multiclass))) # also works with 2d multiclass y_multiclass_2d = y_multiclass.reshape(-1, 1) cv = check_cv(3, y_multiclass_2d, classifier=True) np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass_2d)), list(cv.split(X, y_multiclass_2d))) assert_false(np.all( next(StratifiedKFold(3).split(X, y_multiclass_2d))[0] == next(KFold(3).split(X, y_multiclass_2d))[0])) X = np.ones(5) y_multilabel = np.array([[0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1], [1, 1, 0, 1], [0, 0, 1, 0]]) cv = check_cv(3, y_multilabel, classifier=True) np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]]) cv = check_cv(3, y_multioutput, classifier=True) np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X))) # Check if the old style classes are wrapped to have a split method X = np.ones(9) y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) cv1 = check_cv(3, y_multiclass, classifier=True) with warnings.catch_warnings(record=True): from sklearn.cross_validation import StratifiedKFold as OldSKF cv2 = check_cv(OldSKF(y_multiclass, n_folds=3)) np.testing.assert_equal(list(cv1.split(X, y_multiclass)), list(cv2.split())) assert_raises(ValueError, check_cv, cv="lolo")