Python sklearn.model_selection.StratifiedShuffleSplit() Examples
The following are 30
code examples of sklearn.model_selection.StratifiedShuffleSplit().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: hpc_svm.py From cwcf with MIT License | 8 votes |
def get_full_rbf_svm_clf(train_x, train_y, c_range=None, gamma_range=None): param_grid = dict(gamma=gamma_range, C=c_range) cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42) grid = GridSearchCV(SVC(cache_size=1024), param_grid=param_grid, cv=cv, n_jobs=14, verbose=10) grid.fit(train_x, train_y) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) scores = grid.cv_results_['mean_test_score'].reshape(len(c_range), len(gamma_range)) print("Scores:") print(scores) print("c_range:", c_range) print("gamma_range:", gamma_range) c_best = grid.best_params_['C'] gamma_best = grid.best_params_['gamma'] clf = SVC(C=c_best, gamma=gamma_best, verbose=True) return clf #----------------
Example #2
Source File: test_split.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_stratified_shuffle_split_multilabel_many_labels(): # fix in PR #9922: for multilabel data with > 1000 labels, str(row) # truncates with an ellipsis for elements in positions 4 through # len(row) - 4, so labels were not being correctly split using the powerset # method for transforming a multilabel problem to a multiclass one; this # test checks that this problem is fixed. row_with_many_zeros = [1, 0, 1] + [0] * 1000 + [1, 0, 1] row_with_many_ones = [1, 0, 1] + [1] * 1000 + [1, 0, 1] y = np.array([row_with_many_zeros] * 10 + [row_with_many_ones] * 100) X = np.ones_like(y) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0) train, test = next(sss.split(X=X, y=y)) y_train = y[train] y_test = y[test] # correct stratification of entire rows # (by design, here y[:, 4] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 4]) assert_equal(expected_ratio, np.mean(y_train[:, 4])) assert_equal(expected_ratio, np.mean(y_test[:, 4]))
Example #3
Source File: image_dataset.py From self-ensemble-visual-domain-adapt-photo with MIT License | 6 votes |
def subset_indices(d_source, d_target, subsetsize, subsetseed): if subsetsize > 0: if subsetseed != 0: subset_rng = np.random.RandomState(subsetseed) else: subset_rng = np.random strat = StratifiedShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng) shuf = ShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng) _, source_indices = next(strat.split(d_source.y, d_source.y)) n_src = source_indices.shape[0] if d_target.has_ground_truth: _, target_indices = next(strat.split(d_target.y, d_target.y)) else: _, target_indices = next(shuf.split(np.arange(len(d_target.images)))) n_tgt = target_indices.shape[0] else: source_indices = None target_indices = None n_src = len(d_source.images) n_tgt = len(d_target.images) return source_indices, target_indices, n_src, n_tgt
Example #4
Source File: test_split.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_2d_y(): # smoke test for 2d y and multi-label n_samples = 30 rng = np.random.RandomState(1) X = rng.randint(0, 3, size=(n_samples, 2)) y = rng.randint(0, 3, size=(n_samples,)) y_2d = y.reshape(-1, 1) y_multilabel = rng.randint(0, 2, size=(n_samples, 3)) groups = rng.randint(0, 3, size=(n_samples,)) splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(), RepeatedKFold(), RepeatedStratifiedKFold(), ShuffleSplit(), StratifiedShuffleSplit(test_size=.5), GroupShuffleSplit(), LeaveOneGroupOut(), LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(), PredefinedSplit(test_fold=groups)] for splitter in splitters: list(splitter.split(X, y, groups)) list(splitter.split(X, y_2d, groups)) try: list(splitter.split(X, y_multilabel, groups)) except ValueError as e: allowed_target_types = ('binary', 'multiclass') msg = "Supported target types are: {}. Got 'multilabel".format( allowed_target_types) assert msg in str(e)
Example #5
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_grid_search_groups(): # Check if ValueError (when groups is None) propagates to GridSearchCV # And also check if groups is correctly passed to the cv object rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) groups = rng.randint(0, 3, 15) clf = LinearSVC(random_state=0) grid = {'C': [1]} group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: gs = GridSearchCV(clf, grid, cv=cv) assert_raise_message(ValueError, "The 'groups' parameter should not be None.", gs.fit, X, y) gs.fit(X, y, groups=groups) non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()] for cv in non_group_cvs: gs = GridSearchCV(clf, grid, cv=cv) # Should not raise an error gs.fit(X, y)
Example #6
Source File: test_split.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_stratified_shuffle_split_multilabel(): # fix for issue 9037 for y in [np.array([[0, 1], [1, 0], [1, 0], [0, 1]]), np.array([[0, 1], [1, 1], [1, 1], [0, 1]])]: X = np.ones_like(y) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0) train, test = next(sss.split(X=X, y=y)) y_train = y[train] y_test = y[test] # no overlap assert_array_equal(np.intersect1d(train, test), []) # complete partition assert_array_equal(np.union1d(train, test), np.arange(len(y))) # correct stratification of entire rows # (by design, here y[:, 0] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 0]) assert_equal(expected_ratio, np.mean(y_train[:, 0])) assert_equal(expected_ratio, np.mean(y_test[:, 0]))
Example #7
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_stratified_shuffle_split_multilabel_many_labels(): # fix in PR #9922: for multilabel data with > 1000 labels, str(row) # truncates with an ellipsis for elements in positions 4 through # len(row) - 4, so labels were not being correctly split using the powerset # method for transforming a multilabel problem to a multiclass one; this # test checks that this problem is fixed. row_with_many_zeros = [1, 0, 1] + [0] * 1000 + [1, 0, 1] row_with_many_ones = [1, 0, 1] + [1] * 1000 + [1, 0, 1] y = np.array([row_with_many_zeros] * 10 + [row_with_many_ones] * 100) X = np.ones_like(y) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0) train, test = next(sss.split(X=X, y=y)) y_train = y[train] y_test = y[test] # correct stratification of entire rows # (by design, here y[:, 4] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 4]) assert_equal(expected_ratio, np.mean(y_train[:, 4])) assert_equal(expected_ratio, np.mean(y_test[:, 4]))
Example #8
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_stratified_shuffle_split_multilabel(): # fix for issue 9037 for y in [np.array([[0, 1], [1, 0], [1, 0], [0, 1]]), np.array([[0, 1], [1, 1], [1, 1], [0, 1]])]: X = np.ones_like(y) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0) train, test = next(sss.split(X=X, y=y)) y_train = y[train] y_test = y[test] # no overlap assert_array_equal(np.intersect1d(train, test), []) # complete partition assert_array_equal(np.union1d(train, test), np.arange(len(y))) # correct stratification of entire rows # (by design, here y[:, 0] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 0]) assert_equal(expected_ratio, np.mean(y_train[:, 0])) assert_equal(expected_ratio, np.mean(y_test[:, 0]))
Example #9
Source File: temporalnet_combined.py From Fall-Detection-with-CNNs-and-Optical-Flow with MIT License | 6 votes |
def divide_train_val(zeroes, ones, val_size): """ sss = StratifiedShuffleSplit(n_splits=1, test_size=val_size/2, random_state=7) indices_0 = sss.split(np.zeros(len(zeroes)), zeroes) indices_1 = sss.split(np.zeros(len(ones)), ones) train_indices_0, val_indices_0 = indices_0.next() train_indices_1, val_indices_1 = indices_1.next() """ rand0 = np.random.permutation(len(zeroes)) train_indices_0 = zeroes[rand0[val_size//2:]] val_indices_0 = zeroes[rand0[:val_size//2]] rand1 = np.random.permutation(len(ones)) train_indices_1 = ones[rand1[val_size//2:]] val_indices_1 = ones[rand1[:val_size//2]] return (train_indices_0, train_indices_1, val_indices_0, val_indices_1)
Example #10
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_stratified_shuffle_split_init(): X = np.arange(7) y = np.asarray([0, 1, 1, 1, 2, 2, 2]) # Check that error is raised if there is a class with only one sample assert_raises(ValueError, next, StratifiedShuffleSplit(3, 0.2).split(X, y)) # Check that error is raised if the test set size is smaller than n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y)) # Check that error is raised if the train set size is smaller than # n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 3, 2).split(X, y)) X = np.arange(9) y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2]) # Train size or test size too small assert_raises(ValueError, next, StratifiedShuffleSplit(train_size=2).split(X, y)) assert_raises(ValueError, next, StratifiedShuffleSplit(test_size=2).split(X, y))
Example #11
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_2d_y(): # smoke test for 2d y and multi-label n_samples = 30 rng = np.random.RandomState(1) X = rng.randint(0, 3, size=(n_samples, 2)) y = rng.randint(0, 3, size=(n_samples,)) y_2d = y.reshape(-1, 1) y_multilabel = rng.randint(0, 2, size=(n_samples, 3)) groups = rng.randint(0, 3, size=(n_samples,)) splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(), RepeatedKFold(), RepeatedStratifiedKFold(), ShuffleSplit(), StratifiedShuffleSplit(test_size=.5), GroupShuffleSplit(), LeaveOneGroupOut(), LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(), PredefinedSplit(test_fold=groups)] for splitter in splitters: list(splitter.split(X, y, groups)) list(splitter.split(X, y_2d, groups)) try: list(splitter.split(X, y_multilabel, groups)) except ValueError as e: allowed_target_types = ('binary', 'multiclass') msg = "Supported target types are: {}. Got 'multilabel".format( allowed_target_types) assert msg in str(e)
Example #12
Source File: helpers.py From neupy with MIT License | 5 votes |
def simple_classification(n_samples=100, n_features=10, random_state=33): """ Generate simple classification task for training. Parameters ---------- n_samples : int Number of samples in dataset. n_features : int Number of features for each sample. random_state : int Random state to make results reproducible. Returns ------- tuple Returns tuple that contains 4 variables. There are input train, input test, target train, target test respectevly. """ X, y = datasets.make_classification( n_samples=n_samples, n_features=n_features, random_state=random_state, ) shuffle_split = StratifiedShuffleSplit( n_splits=1, train_size=0.6, test_size=0.1, random_state=random_state, ) train_index, test_index = next(shuffle_split.split(X, y)) x_train, x_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] return x_train, x_test, y_train, y_test
Example #13
Source File: split.py From gumpy with MIT License | 5 votes |
def stratified_shuffle_Split(features, labels, n_splits,test_size,random_state): """Stratified ShuffleSplit cross-validator """ cv = StratifiedShuffleSplit(n_splits, test_size, random_state=random_state) for train_index, test_index in cv.split(features,labels): X_train = features[train_index] X_test = features[test_index] Y_train = labels[train_index] Y_test = labels[test_index] return X_train, X_test, Y_train, Y_test #Random permutation cross-validator
Example #14
Source File: core.py From HungaBunga with MIT License | 5 votes |
def cv_clf(x, y, test_size = 0.2, n_splits = 5, random_state=None, doesUpsample = True): sss_obj = sss(n_splits, test_size, random_state=random_state).split(x, y) if not doesUpsample: yield sss_obj for train_inds, valid_inds in sss_obj: yield (upsample_indices_clf(train_inds, y[train_inds]), valid_inds)
Example #15
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) # Splitter Classes self.assertIs(df.model_selection.KFold, ms.KFold) self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold) self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold) self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut) self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut) self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut) self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut) self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit) self.assertIs(df.model_selection.GroupShuffleSplit, ms.GroupShuffleSplit) # self.assertIs(df.model_selection.StratifiedShuffleSplit, # ms.StratifiedShuffleSplit) self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit) self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit) # Splitter Functions # Hyper-parameter optimizers self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV) self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV) self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid) self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler) # Model validation
Example #16
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper_abbr(self): df = pdml.ModelFrame([]) # Splitter Classes self.assertIs(df.ms.KFold, ms.KFold) self.assertIs(df.ms.GroupKFold, ms.GroupKFold) self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold) self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut) self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut) self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut) self.assertIs(df.ms.LeavePOut, ms.LeavePOut) self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit) self.assertIs(df.ms.GroupShuffleSplit, ms.GroupShuffleSplit) # self.assertIs(df.ms.StratifiedShuffleSplit, # ms.StratifiedShuffleSplit) self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit) self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit) # Splitter Functions # Hyper-parameter optimizers self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV) self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV) self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid) self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler) # Model validation
Example #17
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_StratifiedShuffleSplit(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) sf1 = df.model_selection.StratifiedShuffleSplit(random_state=self.random_state) sf2 = ms.StratifiedShuffleSplit(random_state=self.random_state) # consume generator ind1 = [x for x in sf1.split(df.data.values, df.target.values)] ind2 = [x for x in sf2.split(iris.data, iris.target)] for i1, i2 in zip(ind1, ind2): self.assertIsInstance(i1, tuple) self.assertEqual(len(i1), 2) self.assertIsInstance(i2, tuple) self.assertEqual(len(i2), 2) tm.assert_numpy_array_equal(i1[0], i1[0]) tm.assert_numpy_array_equal(i1[1], i2[1]) sf1 = df.model_selection.StratifiedShuffleSplit(random_state=self.random_state) with tm.assert_produces_warning(FutureWarning): gen = df.model_selection.iterate(sf1) # StratifiedShuffleSplit is not a subclass of BaseCrossValidator for train_df, test_df in gen: self.assertIsInstance(train_df, pdml.ModelFrame) self.assertIsInstance(test_df, pdml.ModelFrame) tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(len(df), len(train_df) + len(test_df))
Example #18
Source File: evaluation.py From nonconformist with MIT License | 5 votes |
def fit(self, x, y): split = StratifiedShuffleSplit(n_splits=1, test_size=self.calibration_portion) for train, cal in split.split(np.zeros((y.size, 1)), y): self.icp.fit(x[train, :], y[train]) self.icp.calibrate(x[cal, :], y[cal])
Example #19
Source File: acp.py From nonconformist with MIT License | 5 votes |
def gen_samples(self, y, n_samples, problem_type): if problem_type == 'classification': splits = StratifiedShuffleSplit( n_splits=n_samples, test_size=self.cal_portion ) split_ = splits.split(np.zeros((y.size, 1)), y) else: splits = ShuffleSplit( n_splits=n_samples, test_size=self.cal_portion ) split_ = splits.split(np.zeros((y.size, 1))) for train, cal in split_: yield train, cal # ----------------------------------------------------------------------------- # Conformal ensemble # -----------------------------------------------------------------------------
Example #20
Source File: data.py From keras-text with MIT License | 5 votes |
def update_test_indices(self, test_size=0.1): """Updates `test_indices` property with indices of `test_size` proportion. Args: test_size: The test proportion in [0, 1] (Default value: 0.1) """ if self.is_multi_label: self._train_indices, self._test_indices = sampling.multi_label_train_test_split(self.y, test_size) else: sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size) self._train_indices, self._test_indices = next(sss.split(self.X, self.y))
Example #21
Source File: data.py From keras-text with MIT License | 5 votes |
def train_val_split(self, split_ratio=0.1): """Generates train and validation sets from the training indices. Args: split_ratio: The split proportion in [0, 1] (Default value: 0.1) Returns: The stratified train and val subsets. Multi-label outputs are handled as well. """ if self.is_multi_label: train_indices, val_indices = sampling.multi_label_train_test_split(self.y, split_ratio) else: sss = StratifiedShuffleSplit(n_splits=1, test_size=split_ratio) train_indices, val_indices = next(sss.split(self.X, self.y)) return self.X[train_indices], self.X[val_indices], self.y[train_indices], self.y[val_indices]
Example #22
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_stratified_shuffle_split_init(): X = np.arange(7) y = np.asarray([0, 1, 1, 1, 2, 2, 2]) # Check that error is raised if there is a class with only one sample assert_raises(ValueError, next, StratifiedShuffleSplit(3, 0.2).split(X, y)) # Check that error is raised if the test set size is smaller than n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y)) # Check that error is raised if the train set size is smaller than # n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 3, 2).split(X, y)) X = np.arange(9) y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2]) # Check that errors are raised if there is not enough samples assert_raises(ValueError, StratifiedShuffleSplit, 3, 0.5, 0.6) assert_raises(ValueError, next, StratifiedShuffleSplit(3, 8, 0.6).split(X, y)) assert_raises(ValueError, next, StratifiedShuffleSplit(3, 0.6, 8).split(X, y)) # Train size or test size too small assert_raises(ValueError, next, StratifiedShuffleSplit(train_size=2).split(X, y)) assert_raises(ValueError, next, StratifiedShuffleSplit(test_size=2).split(X, y))
Example #23
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_stratified_shuffle_split_respects_test_size(): y = np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2]) test_size = 5 train_size = 10 sss = StratifiedShuffleSplit(6, test_size=test_size, train_size=train_size, random_state=0).split(np.ones(len(y)), y) for train, test in sss: assert_equal(len(train), train_size) assert_equal(len(test), test_size)
Example #24
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_stratified_shuffle_split_iter(): ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]), np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]), np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2), np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]), np.array([-1] * 800 + [1] * 50), np.concatenate([[i] * (100 + i) for i in range(11)]), [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3], ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'], ] for y in ys: sss = StratifiedShuffleSplit(6, test_size=0.33, random_state=0).split(np.ones(len(y)), y) y = np.asanyarray(y) # To make it indexable for y[train] # this is how test-size is computed internally # in _validate_shuffle_split test_size = np.ceil(0.33 * len(y)) train_size = len(y) - test_size for train, test in sss: assert_array_equal(np.unique(y[train]), np.unique(y[test])) # Checks if folds keep classes proportions p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1]) / float(len(y[train]))) p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1]) / float(len(y[test]))) assert_array_almost_equal(p_train, p_test, 1) assert_equal(len(train) + len(test), y.size) assert_equal(len(train), train_size) assert_equal(len(test), test_size) assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
Example #25
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_stratifiedshufflesplit_list_input(): # Check that when y is a list / list of string labels, it works. sss = StratifiedShuffleSplit(test_size=2, random_state=42) X = np.ones(7) y1 = ['1'] * 4 + ['0'] * 3 y2 = np.hstack((np.ones(4), np.zeros(3))) y3 = y2.tolist() np.testing.assert_equal(list(sss.split(X, y1)), list(sss.split(X, y2))) np.testing.assert_equal(list(sss.split(X, y3)), list(sss.split(X, y2)))
Example #26
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_nested_cv(): # Test if nested cross validation works with different combinations of cv rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) groups = rng.randint(0, 5, 15) cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(), StratifiedShuffleSplit(n_splits=3, random_state=0)] for inner_cv, outer_cv in combinations_with_replacement(cvs, 2): gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]}, cv=inner_cv) cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv, fit_params={'groups': groups})
Example #27
Source File: test_split.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_train_test_default_warning(): assert_warns(FutureWarning, ShuffleSplit, train_size=0.75) assert_warns(FutureWarning, GroupShuffleSplit, train_size=0.75) assert_warns(FutureWarning, StratifiedShuffleSplit, train_size=0.75) assert_warns(FutureWarning, train_test_split, range(3), train_size=0.75)
Example #28
Source File: problem.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_cv(X, y): cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=57) return cv.split(X, y)
Example #29
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_nested_cv(): # Test if nested cross validation works with different combinations of cv rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) groups = rng.randint(0, 5, 15) cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(), StratifiedShuffleSplit(n_splits=3, random_state=0)] for inner_cv, outer_cv in combinations_with_replacement(cvs, 2): gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]}, cv=inner_cv, error_score='raise', iid=False) cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv, fit_params={'groups': groups})
Example #30
Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_stratifiedshufflesplit_list_input(): # Check that when y is a list / list of string labels, it works. sss = StratifiedShuffleSplit(test_size=2, random_state=42) X = np.ones(7) y1 = ['1'] * 4 + ['0'] * 3 y2 = np.hstack((np.ones(4), np.zeros(3))) y3 = y2.tolist() np.testing.assert_equal(list(sss.split(X, y1)), list(sss.split(X, y2))) np.testing.assert_equal(list(sss.split(X, y3)), list(sss.split(X, y2)))