Python sklearn.model_selection.RepeatedKFold() Examples
The following are 14
code examples of sklearn.model_selection.RepeatedKFold().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
, or try the search function

Example #1
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_2d_y(): # smoke test for 2d y and multi-label n_samples = 30 rng = np.random.RandomState(1) X = rng.randint(0, 3, size=(n_samples, 2)) y = rng.randint(0, 3, size=(n_samples,)) y_2d = y.reshape(-1, 1) y_multilabel = rng.randint(0, 2, size=(n_samples, 3)) groups = rng.randint(0, 3, size=(n_samples,)) splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(), RepeatedKFold(), RepeatedStratifiedKFold(), ShuffleSplit(), StratifiedShuffleSplit(test_size=.5), GroupShuffleSplit(), LeaveOneGroupOut(), LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(), PredefinedSplit(test_fold=groups)] for splitter in splitters: list(splitter.split(X, y, groups)) list(splitter.split(X, y_2d, groups)) try: list(splitter.split(X, y_multilabel, groups)) except ValueError as e: allowed_target_types = ('binary', 'multiclass') msg = "Supported target types are: {}. Got 'multilabel".format( allowed_target_types) assert msg in str(e)
Example #2
Source File: From autogluon with Apache License 2.0 | 6 votes |
def generate_kfold(X, y=None, n_splits=5, random_state=0, stratified=False, n_repeats=1): if stratified and (y is not None): if n_repeats > 1: kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state) else: kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state) kf.get_n_splits(X, y) return [[train_index, test_index] for train_index, test_index in kf.split(X, y)] else: if n_repeats > 1: kf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state) else: kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state) kf.get_n_splits(X) return [[train_index, test_index] for train_index, test_index in kf.split(X)]
Example #3
Source File: From twitter-stock-recommendation with MIT License | 6 votes |
def test_2d_y(): # smoke test for 2d y and multi-label n_samples = 30 rng = np.random.RandomState(1) X = rng.randint(0, 3, size=(n_samples, 2)) y = rng.randint(0, 3, size=(n_samples,)) y_2d = y.reshape(-1, 1) y_multilabel = rng.randint(0, 2, size=(n_samples, 3)) groups = rng.randint(0, 3, size=(n_samples,)) splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(), RepeatedKFold(), RepeatedStratifiedKFold(), ShuffleSplit(), StratifiedShuffleSplit(test_size=.5), GroupShuffleSplit(), LeaveOneGroupOut(), LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(), PredefinedSplit(test_fold=groups)] for splitter in splitters: list(splitter.split(X, y, groups)) list(splitter.split(X, y_2d, groups)) try: list(splitter.split(X, y_multilabel, groups)) except ValueError as e: allowed_target_types = ('binary', 'multiclass') msg = "Supported target types are: {}. Got 'multilabel".format( allowed_target_types) assert msg in str(e)
Example #4
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_repeated_cv_value_errors(): # n_repeats is not integer or <= 0 for cv in (RepeatedKFold, RepeatedStratifiedKFold): assert_raises(ValueError, cv, n_repeats=0) assert_raises(ValueError, cv, n_repeats=1.5)
Example #5
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_repeated_kfold_determinstic_split(): X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] random_state = 258173307 rkf = RepeatedKFold( n_splits=2, n_repeats=2, random_state=random_state) # split should produce same and deterministic splits on # each call for _ in range(3): splits = rkf.split(X) train, test = next(splits) assert_array_equal(train, [2, 4]) assert_array_equal(test, [0, 1, 3]) train, test = next(splits) assert_array_equal(train, [0, 1, 3]) assert_array_equal(test, [2, 4]) train, test = next(splits) assert_array_equal(train, [0, 1]) assert_array_equal(test, [2, 3, 4]) train, test = next(splits) assert_array_equal(train, [2, 3, 4]) assert_array_equal(test, [0, 1]) assert_raises(StopIteration, next, splits)
Example #6
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_get_n_splits_for_repeated_kfold(): n_splits = 3 n_repeats = 4 rkf = RepeatedKFold(n_splits, n_repeats) expected_n_splits = n_splits * n_repeats assert_equal(expected_n_splits, rkf.get_n_splits())
Example #7
Source File: From pwtools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, points, values, rbf_kwds=dict(), cv_kwds=dict(ns=5, nr=1)): """ Parameters ---------- points, values : see :class:`Rbf` rbf_kwds : dict for ``Rbf(points, values, **rbf_kwds)`` cv_kwds : dict cross-validation parameters: `ns` = `n_splits`, `nr` = `n_repeats` (see sklearn.model_selection.RepeatedKFold) """ self.points = points self.values = values self.rbf_kwds = rbf_kwds self.cv_kwds = cv_kwds
Example #8
Source File: From pwtools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def cv(self, params): """K-fold repeated CV. Split data (points, values) randomly into K parts ("folds", K = ``ns`` in ``self.cv_kwds``) along axis 0 and use each part once as test set, the rest as training set. For example `ns=5`: split in 5 parts at random indices, use 5 times 4/5 data for train, 1/5 for test (each of the folds), so 5 fits total -> 5 fit errors. Optionally repeat ``nr`` times with different random splits. So, `nr` * `ns` fit errors total. Each time, build an Rbf interpolator with ``self.rbf_kwds``, fit, return the fit error (scalar sum of squares from :meth:`Rbf.fit_error`). Parameters ---------- params : seq length 1 or 2 | params[0] = p | params[1] = r (optional) Returns ------- errs : 1d array (nr*ns,) direct fit error from each fold """ ns = self.cv_kwds['ns'] nr = self.cv_kwds['nr'] errs = np.empty((ns*nr,), dtype=float) folds = RepeatedKFold(n_splits=ns, n_repeats=nr) for ii, tup in enumerate(folds.split(self.points)): idxs_train, idxs_test = tup rbfi = self._get_rbfi(params, self.points[idxs_train,...], self.values[idxs_train,...]) errs[ii] = rbfi.fit_error(self.points[idxs_test,...], self.values[idxs_test,...]) return errs
Example #9
Source File: From MAST-ML with MIT License | 5 votes |
def fit(self, X, y, savepath=None, refit=True, iid=True): rst = dict() param_dict = self._get_grid_param_dict() if savepath is None: savepath = os.getcwd() estimator_name = self._estimator_name param_dict = self._search_space_generator(param_dict) if is None: = ms.RepeatedKFold() model = GridSearchCV(self.estimator, param_dict, scoring=self.scoring,, refit=refit, iid=iid, n_jobs=self.n_jobs, verbose=2) try: rst[estimator_name] =, y) except: log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize' ' one or more parameters over. Please check your input file and the sklearn docs for the mode' ' you are optimizing for the domain of correct values') exit() best_estimator = rst[estimator_name].best_estimator_ self._save_output(savepath, rst) return best_estimator
Example #10
Source File: From MAST-ML with MIT License | 5 votes |
def fit(self, X, y, savepath=None, refit=True): rst = dict() param_dict = self._get_randomized_param_dict() if savepath is None: savepath = os.getcwd() estimator_name = self._estimator_name if is None: = ms.RepeatedKFold() model = RandomizedSearchCV(self.estimator, param_dict, n_iter=self.n_iter, scoring=self.scoring,, refit=refit, n_jobs=self.n_jobs, verbose=2) try: rst[estimator_name] =, y) except: log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize' ' one or more parameters over. Please check your input file and the sklearn docs for the mode' ' you are optimizing for the domain of correct values') exit() best_estimator = rst[estimator_name].best_estimator_ self._save_output(savepath, rst) return best_estimator
Example #11
Source File: From MAST-ML with MIT License | 5 votes |
def fit(self, X, y, savepath=None, refit=True): rst = dict() param_dict = self._get_bayesian_param_dict() if savepath is None: savepath = os.getcwd() estimator_name = self._estimator_name if is None: = ms.RepeatedKFold() model = BayesSearchCV(estimator=self.estimator, search_spaces=param_dict, n_iter=self.n_iter, scoring=self.scoring,, refit=refit, n_jobs=self.n_jobs, verbose=2) try: rst[estimator_name] =, y) except: log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize' ' one or more parameters over. Please check your input file and the sklearn docs for the mode' ' you are optimizing for the domain of correct values') exit() best_estimator = rst[estimator_name].best_estimator_ self._save_output(savepath, rst) return best_estimator
Example #12
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_repeated_cv_value_errors(): # n_repeats is not integer or <= 0 for cv in (RepeatedKFold, RepeatedStratifiedKFold): assert_raises(ValueError, cv, n_repeats=0) assert_raises(ValueError, cv, n_repeats=1.5)
Example #13
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_repeated_kfold_determinstic_split(): X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] random_state = 258173307 rkf = RepeatedKFold( n_splits=2, n_repeats=2, random_state=random_state) # split should produce same and deterministic splits on # each call for _ in range(3): splits = rkf.split(X) train, test = next(splits) assert_array_equal(train, [2, 4]) assert_array_equal(test, [0, 1, 3]) train, test = next(splits) assert_array_equal(train, [0, 1, 3]) assert_array_equal(test, [2, 4]) train, test = next(splits) assert_array_equal(train, [0, 1]) assert_array_equal(test, [2, 3, 4]) train, test = next(splits) assert_array_equal(train, [2, 3, 4]) assert_array_equal(test, [0, 1]) assert_raises(StopIteration, next, splits)
Example #14
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_get_n_splits_for_repeated_kfold(): n_splits = 3 n_repeats = 4 rkf = RepeatedKFold(n_splits, n_repeats) expected_n_splits = n_splits * n_repeats assert_equal(expected_n_splits, rkf.get_n_splits())