Python sklearn.model_selection.ShuffleSplit() Examples

The following are 30 code examples of sklearn.model_selection.ShuffleSplit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: TargetingSystem.py    From poeai with MIT License 7 votes vote down vote up
def Train(self, C, A, Y, SF):
        '''
        Train the classifier using the sample matrix A and target matrix Y
        '''
        C.fit(A, Y)
        YH = np.zeros(Y.shape, dtype = np.object)
        for i in np.array_split(np.arange(A.shape[0]), 32):   #Split up verification into chunks to prevent out of memory
            YH[i] = C.predict(A[i])
        s1 = SF(Y, YH)
        print('All:{:8.6f}'.format(s1))
        '''
        ss = ShuffleSplit(random_state = 1151)  #Use fixed state for so training can be repeated later
        trn, tst = next(ss.split(A, Y))         #Make train/test split
        mi = [8] * 1                            #Maximum number of iterations at each iter
        YH = np.zeros((A.shape[0]), dtype = np.object)
        for mic in mi:                                      #Chunk size to split dataset for CV results
            #C.SetMaxIter(mic)                               #Set the maximum number of iterations to run
            #C.fit(A[trn], Y[trn])                           #Perform training iterations
        ''' 
Example #2
Source File: test_split.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example #3
Source File: expedia_dataset_reader.py    From cs-ranking with Apache License 2.0 6 votes vote down vote up
def get_single_train_test_split(self):
        splits = dict()
        cv_iter = ShuffleSplit(
            n_splits=1, random_state=self.random_state, test_size=0.80
        )
        for n_obj, arr in self.X_dict.items():
            if arr.shape[0] == 1:
                splits[n_obj] = ([0], [0])
            else:
                splits[n_obj] = list(cv_iter.split(arr))[0]
        self.X_train = dict()
        self.Y_train = dict()
        self.X_test = dict()
        self.Y_test = dict()
        for n_obj, itr in splits.items():
            train_idx, test_idx = itr
            self.X_train[n_obj] = np.copy(self.X_dict[n_obj][train_idx])
            self.X_test[n_obj] = np.copy(self.X_dict[n_obj][test_idx])
            self.Y_train[n_obj] = np.copy(self.Y_dict[n_obj][train_idx])
            self.Y_test[n_obj] = np.copy(self.Y_dict[n_obj][test_idx])
        self.X, self.Y = self.sub_sampling_from_dictionary()
        self.__check_dataset_validity__()
        self.X, self.X_test = standardize_features(self.X, self.X_test)
        return self.X, self.Y, self.X_test, self.Y_test 
Example #4
Source File: image_dataset.py    From self-ensemble-visual-domain-adapt-photo with MIT License 6 votes vote down vote up
def subset_indices(d_source, d_target, subsetsize, subsetseed):
    if subsetsize > 0:
        if subsetseed != 0:
            subset_rng = np.random.RandomState(subsetseed)
        else:
            subset_rng = np.random
        strat = StratifiedShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        shuf = ShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        _, source_indices = next(strat.split(d_source.y, d_source.y))
        n_src = source_indices.shape[0]
        if d_target.has_ground_truth:
            _, target_indices = next(strat.split(d_target.y, d_target.y))
        else:
            _, target_indices = next(shuf.split(np.arange(len(d_target.images))))
        n_tgt = target_indices.shape[0]
    else:
        source_indices = None
        target_indices = None
        n_src = len(d_source.images)
        n_tgt = len(d_target.images)

    return source_indices, target_indices, n_src, n_tgt 
Example #5
Source File: stockpredictor.py    From SyBrain with GNU General Public License v3.0 6 votes vote down vote up
def TestPerformance(self, df=None):
        # If no dataframe is provided, use the currently learned one
        if (df is None):
            D = self.D
        else:
            D = self.S.transform(df.copy())
        # Get features from the data frame
        A = self._ExtractFeat(D)
        # Get the target values and their corresponding column names
        y, _ = self._ExtractTarg(D)
        # Begin cross validation
        ss = ShuffleSplit(n_splits=1)
        for trn, tst in ss.split(A):
            s1 = cross_val_score(self.R, A, y, cv=3, scoring=make_scorer(r2_score))
            s2 = cross_val_score(self.R, A[tst], y[tst], cv=3, scoring=make_scorer(r2_score))
            s3 = cross_val_score(self.R, A[trn], y[trn], cv=3, scoring=make_scorer(r2_score))
            print('C-V:\t' + str(s1) + '\nTst:\t' + str(s2) + '\nTrn:\t' + str(s3)) 
Example #6
Source File: test_multiclass.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2) 
Example #7
Source File: test_split.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example #8
Source File: test_multiclass.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2) 
Example #9
Source File: active.py    From chemml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _update_train_test(self):
        """
        This function take care of the test_type parameter.

        """
        if self.test_type == 'passive':
            return True
        if len(self._queries) > 0:
            return True
        else:
            # active test split
            all_indices = np.concatenate([self.train_indices, self.test_indices], axis=0)
            all_y = np.concatenate([self._Y_train, self._Y_test], axis=0)
            # select randomly
            ss = ShuffleSplit(n_splits=1, test_size=self.test_size, train_size=None, random_state=90)
            for train_indices, test_indices in ss.split(all_indices):
                # test
                self._Y_test = all_y[test_indices]
                self.test_indices = all_indices[test_indices]
                # train
                self._Y_train = all_y[train_indices]
                self.train_indices = all_indices[train_indices] 
Example #10
Source File: split.py    From gumpy with MIT License 5 votes vote down vote up
def  stratified_shuffle_Split(features, labels, n_splits,test_size,random_state):

    """Stratified ShuffleSplit cross-validator
    """
    cv = StratifiedShuffleSplit(n_splits, test_size, random_state=random_state)
    for train_index, test_index in cv.split(features,labels):
        X_train = features[train_index]
        X_test = features[test_index]
        Y_train = labels[train_index]
        Y_test = labels[test_index]
    return X_train, X_test, Y_train, Y_test


#Random permutation cross-validator 
Example #11
Source File: classifier_basetest.py    From scikit-multilearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def assertClassifierWorksWithCV(self, classifier):
        # all the nice stuff is tested here - whether the classifier is
        # clonable, etc.
        for X, y in self.get_multilabel_data_for_tests('dense'):
            n_iterations = 3
            cv = model_selection.ShuffleSplit(n_splits=n_iterations, test_size=0.5, random_state=0)

            scores = model_selection.cross_val_score(
                classifier, X, y=y, cv=cv, scoring='accuracy')

            self.assertEqual(len(scores), n_iterations) 
Example #12
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #13
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #14
Source File: split.py    From gumpy with MIT License 5 votes vote down vote up
def  shuffle_Split(features, labels, n_splits,test_size,random_state):

    """ShuffleSplit: Random permutation cross-validator
    """
    cv = ShuffleSplit(n_splits, test_size, random_state=random_state)
    for train_index, test_index in cv.split(features):
        X_train = features[train_index]
        X_test = features[test_index]
        Y_train = labels[train_index]
        Y_test = labels[test_index]
    return X_train, X_test, Y_train, Y_test 
Example #15
Source File: DeepOCR.py    From pythonml with MIT License 5 votes vote down vote up
def FitModel(cnnc, A, Y, T, FN):
    print('Fitting model...')
    ss = ShuffleSplit(n_splits = 1)
    trn, tst = next(ss.split(A))
    #Fit the network
    cnnc.fit(A[trn], Y[trn])
    #The predictions as sequences of character indices
    YH = []
    for i in np.array_split(np.arange(A.shape[0]), 32): 
        YH.append(cnnc.predict(A[i]))
    YH = np.vstack(YH)
    #Convert from sequence of char indices to strings
    PS = np.array([''.join(YHi) for YHi in YH])
    #Compute the accuracy
    S1 = SAcc(PS[trn], T[trn])
    S2 = SAcc(PS[tst], T[tst])
    print('Train: ' + str(S1))
    print('Test: ' + str(S2))
    for PSi, Ti, FNi in zip(PS, T, FN):
        if np.random.rand() > 0.99: #Randomly select rows to print
            print(FNi + ': ' + Ti + ' -> ' + PSi)
    print('Fitting with CV data...')
    #Fit remainder
    cnnc.SetMaxIter(4)
    cnnc.fit(A, Y)
    return cnnc 
Example #16
Source File: predict.py    From NetMF with MIT License 5 votes vote down vote up
def predict_cv(X, y, train_ratio=0.2, n_splits=10, random_state=0, C=1.):
    micro, macro = [], []
    shuffle = ShuffleSplit(n_splits=n_splits, test_size=1-train_ratio,
            random_state=random_state)
    for train_index, test_index in shuffle.split(X):
        print(train_index.shape, test_index.shape)
        assert len(set(train_index) & set(test_index)) == 0
        assert len(train_index) + len(test_index) == X.shape[0]
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf = OneVsRestClassifier(
                LogisticRegression(
                    C=C,
                    solver="liblinear",
                    multi_class="ovr"),
                n_jobs=-1)
        clf.fit(X_train, y_train)
        y_score = clf.predict_proba(X_test)
        y_pred = construct_indicator(y_score, y_test)
        mi = f1_score(y_test, y_pred, average="micro")
        ma = f1_score(y_test, y_pred, average="macro")
        logger.info("micro f1 %f macro f1 %f", mi, ma)
        micro.append(mi)
        macro.append(ma)
    logger.info("%d fold validation, training ratio %f", len(micro), train_ratio)
    logger.info("Average micro %.2f, Average macro %.2f",
            np.mean(micro) * 100,
            np.mean(macro) * 100) 
Example #17
Source File: acp.py    From nonconformist with MIT License 5 votes vote down vote up
def gen_samples(self, y, n_samples, problem_type):
		if problem_type == 'classification':
			splits = StratifiedShuffleSplit(
					n_splits=n_samples,
					test_size=self.cal_portion
				)

			split_ = splits.split(np.zeros((y.size, 1)), y)
		
		else:
			splits = ShuffleSplit(
				n_splits=n_samples,
				test_size=self.cal_portion
			)

			split_ = splits.split(np.zeros((y.size, 1)))

		for train, cal in split_:
			yield train, cal


# -----------------------------------------------------------------------------
# Conformal ensemble
# ----------------------------------------------------------------------------- 
Example #18
Source File: Stacking.py    From Kaggle-Competition-Sberbank with MIT License 5 votes vote down vote up
def fit_predict(self, trainDf, testDf):
        X = trainDf.drop(['price_doc', 'w'], 1).values
        y = trainDf['price_doc'].values
        w = trainDf['w'].values
        T = testDf.values

        X_fillna = trainDf.drop(['price_doc', 'w'], 1).fillna(-999).values
        T_fillna = testDf.fillna(-999).values

        folds = list(KFold(len(y), n_folds=self.n_folds, shuffle=True))
        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((T.shape[0], len(self.base_models)))
        for i, clf in enumerate(self.base_models):
            print('Training base model ' + str(i+1) + '...')
            S_test_i = np.zeros((T.shape[0], len(folds)))
            for j, (train_idx, test_idx) in enumerate(folds):
                print('Training round ' + str(j+1) + '...')
                if clf not in [xgb1,lgb1]: # sklearn models cannot handle missing values.
                    X = X_fillna
                    T = T_fillna
                X_train = X[train_idx]
                y_train = y[train_idx]
                w_train = w[train_idx]
                X_holdout = X[test_idx]
                # w_holdout = w[test_idx]
                # y_holdout = y[test_idx]
                clf.fit(X_train, y_train, w_train)
                y_pred = clf.predict(X_holdout)
                S_train[test_idx, i] = y_pred
                S_test_i[:, j] = clf.predict(T)
            S_test[:, i] = S_test_i.mean(1)
        self.S_train, self.S_test, self.y = S_train, S_test, y  # for diagnosis purpose
        self.corr = pd.concat([pd.DataFrame(S_train),trainDf['price_doc']],1).corr() # correlation of predictions by different models.
        # cv_stack = ShuffleSplit(n_splits=6, test_size=0.2)
        # score_stacking = cross_val_score(self.stacker, S_train, y, cv=cv_stack, n_jobs=1, scoring='neg_mean_squared_error')
        # print(np.sqrt(-score_stacking.mean())) # CV result of stacking
        self.stacker.fit(S_train, y)
        y_pred = self.stacker.predict(S_test)
        return y_pred 
Example #19
Source File: sushi_discrete_choice_dataset_reader.py    From cs-ranking with Apache License 2.0 5 votes vote down vote up
def get_single_train_test_split(self):
        cv_iter = ShuffleSplit(
            n_splits=1, random_state=self.random_state, test_size=0.20
        )
        splits = list(cv_iter.split(self.X))
        return list(self.splitter(splits))[0] 
Example #20
Source File: survey_dataset_reader.py    From cs-ranking with Apache License 2.0 5 votes vote down vote up
def get_single_train_test_split(self):
        cv_iter = ShuffleSplit(
            n_splits=1, test_size=0.3, random_state=self.random_state
        )
        (train_idx, test_idx) = list(cv_iter.split(self.X))[0]
        return self.X[train_idx], self.Y[train_idx], self.X[test_idx], self.Y[test_idx] 
Example #21
Source File: intelligent_system_group_dataset_reader.py    From cs-ranking with Apache License 2.0 5 votes vote down vote up
def get_single_train_test_split(self, name="cold"):
        cv_iter = ShuffleSplit(
            n_splits=1, test_size=0.3, random_state=self.random_state
        )
        (train_idx, test_idx) = list(cv_iter.split(self.X[name]))[0]
        return (
            self.X[name][train_idx],
            self.Y[name][train_idx],
            self.X[name][test_idx],
            self.Y[name][test_idx],
        ) 
Example #22
Source File: sushi_dyad_ranking_dataset_reader.py    From cs-ranking with Apache License 2.0 5 votes vote down vote up
def get_single_train_test_split(self):
        cv_iter = ShuffleSplit(
            n_splits=1, random_state=self.random_state, test_size=0.30
        )
        splits = list(cv_iter.split(self.X))
        return list(self.splitter(splits))[0] 
Example #23
Source File: sushi_object_ranking_dataset_reader.py    From cs-ranking with Apache License 2.0 5 votes vote down vote up
def get_single_train_test_split(self):
        cv_iter = ShuffleSplit(
            n_splits=1, random_state=self.random_state, test_size=0.30
        )
        splits = list(cv_iter.split(self.X))
        return list(self.splitter(splits))[0] 
Example #24
Source File: test_split.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_shuffle_split():
    ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X)
    ss2 = ShuffleSplit(test_size=2, random_state=0).split(X)
    ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X)
    for typ in six.integer_types:
        ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X)
    for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4):
        assert_array_equal(t1[0], t2[0])
        assert_array_equal(t2[0], t3[0])
        assert_array_equal(t3[0], t4[0])
        assert_array_equal(t1[1], t2[1])
        assert_array_equal(t2[1], t3[1])
        assert_array_equal(t3[1], t4[1]) 
Example #25
Source File: test_split.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_shufflesplit_errors():
    # When the {test|train}_size is a float/invalid, error is raised at init
    assert_raises(ValueError, ShuffleSplit, test_size=None, train_size=None)
    assert_raises(ValueError, ShuffleSplit, test_size=2.0)
    assert_raises(ValueError, ShuffleSplit, test_size=1.0)
    assert_raises(ValueError, ShuffleSplit, test_size=0.1, train_size=0.95)
    assert_raises(ValueError, ShuffleSplit, train_size=1j)

    # When the {test|train}_size is an int, validation is based on the input X
    # and happens at split(...)
    assert_raises(ValueError, next, ShuffleSplit(test_size=11).split(X))
    assert_raises(ValueError, next, ShuffleSplit(test_size=10).split(X))
    assert_raises(ValueError, next, ShuffleSplit(test_size=8,
                                                 train_size=3).split(X)) 
Example #26
Source File: test_split.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_train_test_default_warning():
    assert_warns(FutureWarning, ShuffleSplit, train_size=0.75)
    assert_warns(FutureWarning, GroupShuffleSplit, train_size=0.75)
    assert_warns(FutureWarning, StratifiedShuffleSplit, train_size=0.75)
    assert_warns(FutureWarning, train_test_split, range(3),
                 train_size=0.75) 
Example #27
Source File: estimator_utils.py    From EDeN with MIT License 5 votes vote down vote up
def estimate_predictive_performance(x_y,
                                    estimator=None,
                                    n_splits=10,
                                    random_state=1):
    """estimate_predictive_performance."""
    x, y = x_y
    cv = ShuffleSplit(n_splits=n_splits,
                      test_size=0.3,
                      random_state=random_state)
    scoring = make_scorer(average_precision_score)
    scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring)
    return scores 
Example #28
Source File: base.py    From deep_pipe with MIT License 5 votes vote down vote up
def train_test_split_groups(X, *, val_size, groups=None, **kwargs):
    split_class = (ShuffleSplit if groups is None else GroupShuffleSplit)
    split = split_class(test_size=val_size, **kwargs)
    train, val = next(split.split(X=X, groups=groups))
    return X[train], X[val] 
Example #29
Source File: core.py    From HungaBunga with MIT License 5 votes vote down vote up
def cv_reg(x, test_size = 0.2, n_splits = 5, random_state=None): return ss(n_splits, test_size, random_state=random_state).split(x) 
Example #30
Source File: test_model_selection.py    From verde with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_cross_val_score_client(trend):
    "Test the deprecated dask Client interface"
    coords, data = trend[:2]
    model = Trend(degree=1)
    nsplits = 5
    cross_validator = ShuffleSplit(n_splits=nsplits, random_state=0)
    client = Client(processes=False)
    futures = cross_val_score(model, coords, data, cv=cross_validator, client=client)
    scores = [future.result() for future in futures]
    client.close()
    assert len(scores) == nsplits
    npt.assert_allclose(scores, 1)