Python sklearn.model_selection.cross_val_score() Examples
The following are 30
code examples of sklearn.model_selection.cross_val_score().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: mmbot.py From MaliciousMacroBot with MIT License | 8 votes |
def mmb_evaluate_model(self): """ Returns scores from cross validation evaluation on the malicious / benign classifier """ predictive_features = self.features['predictive_features'] self.clf_X = self.modeldata[predictive_features].values self.clf_y = np.array(self.modeldata['label']) X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0) lb = LabelBinarizer() y_train = np.array([number[0] for number in lb.fit_transform(y_train)]) eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2) eval_cls.fit(X_train, y_train) recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall') precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision') accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy') f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro') return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
Example #2
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 8 votes |
def test_check_scoring_gridsearchcv(): # test that check_scoring works on GridSearchCV and pipeline. # slightly redundant non-regression test. grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]}) scorer = check_scoring(grid, "f1") assert isinstance(scorer, _PredictScorer) pipe = make_pipeline(LinearSVC()) scorer = check_scoring(pipe, "f1") assert isinstance(scorer, _PredictScorer) # check that cross_val_score definitely calls the scorer # and doesn't make any assumptions about the estimator apart from having a # fit. scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1], scoring=DummyScorer()) assert_array_equal(scores, 1)
Example #3
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def test_cross_val_score_precomputed(): # test for svm with precomputed kernel svm = SVC(kernel="precomputed") iris = load_iris() X, y = iris.data, iris.target linear_kernel = np.dot(X, X.T) score_precomputed = cross_val_score(svm, linear_kernel, y) svm = SVC(kernel="linear") score_linear = cross_val_score(svm, X, y) assert_array_almost_equal(score_precomputed, score_linear) # test with callable svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T)) score_callable = cross_val_score(svm, X, y) assert_array_almost_equal(score_precomputed, score_callable) # Error raised for non-square X svm = SVC(kernel="precomputed") assert_raises(ValueError, cross_val_score, svm, X, y) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cross_val_score, svm, linear_kernel.tolist(), y)
Example #4
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def test_cross_val_score_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: pass for TargetType, InputFeatureType in types: # X dataframe, y series # 3 fold cross val is used so we need atleast 3 samples per class X_df, y_ser = InputFeatureType(X), TargetType(y2) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) cross_val_score(clf, X_df, y_ser)
Example #5
Source File: grid_search_cv.py From text-classifier with Apache License 2.0 | 7 votes |
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)): # grid search找到最好的参数 parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]} clf = GridSearchCV(model, param_grid=parameters) grid_search = clf.fit(x_train, y_train) # 对结果打分 print("Best score: %0.3f" % grid_search.best_score_) print(grid_search.best_estimator_) # best prarams print('best prarams:', clf.best_params_) print('-----grid search end------------') print('on all train set') scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy') print(scores.mean(), scores) print('on test set') scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy') print(scores.mean(), scores)
Example #6
Source File: test_nfpc.py From fylearn with MIT License | 7 votes |
def test_build_meowa_factory(): iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory, aggregation_factory=nfpc.MEOWAFactory()) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) mean = np.mean(scores) assert 0.80 < mean
Example #7
Source File: test_fpcga.py From fylearn with MIT License | 7 votes |
def test_classifier_iris(): iris = load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) assert len(scores) == 10 assert np.mean(scores) > 0.6 mean = np.mean(scores) print("mean", mean) assert 0.92 == pytest.approx(mean, 0.01)
Example #8
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_score_memmap(): # Ensure a scalar score of memmap type is accepted iris = load_iris() X, y = iris.data, iris.target clf = MockClassifier() tf = tempfile.NamedTemporaryFile(mode='wb', delete=False) tf.write(b'Hello world!!!!!') tf.close() scores = np.memmap(tf.name, dtype=np.float64) score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64) try: cross_val_score(clf, X, y, scoring=lambda est, X, y: score) # non-scalar should still fail assert_raises(ValueError, cross_val_score, clf, X, y, scoring=lambda est, X, y: scores) finally: # Best effort to release the mmap file handles before deleting the # backing file under Windows scores, score = None, None for _ in range(3): try: os.unlink(tf.name) break except WindowsError: sleep(1.)
Example #9
Source File: test_pyglmnet.py From pyglmnet with MIT License | 6 votes |
def test_cv(): """Simple CV check.""" # XXX: don't use scikit-learn for tests. X, y = make_regression() cv = KFold(n_splits=5) glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1) # check that it returns 5 scores scores = cross_val_score(glm_normal, X, y, cv=cv) assert(len(scores) == 5) param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)}, {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))}] glmcv = GridSearchCV(glm_normal, param_grid, cv=cv) glmcv.fit(X, y)
Example #10
Source File: utilities.py From Python-Machine-Learning-Cookbook-Second-Edition with MIT License | 6 votes |
def print_accuracy_report(classifier, X, y, num_validations=5): accuracy = model_selection.cross_val_score(classifier, X, y, scoring='accuracy', cv=num_validations) print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%") f1 = model_selection.cross_val_score(classifier, X, y, scoring='f1_weighted', cv=num_validations) print("F1: " + str(round(100*f1.mean(), 2)) + "%") precision = model_selection.cross_val_score(classifier, X, y, scoring='precision_weighted', cv=num_validations) print("Precision: " + str(round(100*precision.mean(), 2)) + "%") recall = model_selection.cross_val_score(classifier, X, y, scoring='recall_weighted', cv=num_validations) print("Recall: " + str(round(100*recall.mean(), 2)) + "%")
Example #11
Source File: utilities.py From Python-Machine-Learning-Cookbook-Second-Edition with MIT License | 6 votes |
def print_accuracy_report(classifier, X, y, num_validations=5): accuracy = model_selection.cross_val_score(classifier, X, y, scoring='accuracy', cv=num_validations) print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%") f1 =model_selection.cross_val_score(classifier, X, y, scoring='f1_weighted', cv=num_validations) print("F1: " + str(round(100*f1.mean(), 2)) + "%") precision = model_selection.cross_val_score(classifier, X, y, scoring='precision_weighted', cv=num_validations) print("Precision: " + str(round(100*precision.mean(), 2)) + "%") recall = model_selection.cross_val_score(classifier, X, y, scoring='recall_weighted', cv=num_validations) print("Recall: " + str(round(100*recall.mean(), 2)) + "%")
Example #12
Source File: test_nfpc.py From fylearn with MIT License | 6 votes |
def test_build_ps_owa_factory(): iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = nfpc.FuzzyPatternClassifier( membership_factory=t_factory, aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer()) ) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) mean = np.mean(scores) print("mean", mean) assert 0.92 < mean
Example #13
Source File: test_crossvalidation.py From pysaliency with MIT License | 6 votes |
def test_image_crossvalidation(stimuli, fixation_trains): gsmm = GaussianSaliencyModel() cv = ScikitLearnImageCrossValidationGenerator(stimuli, fixation_trains) assert unpack_crossval(cv) == [ ([False, False, False, False, False, True, True, True, True], [True, True, True, True, True, False, False, False, False]), ([True, True, True, True, True, False, False, False, True], [False, False, False, False, False, True, True, True, False]), ([True, True, True, True, True, True, True, True, False], [False, False, False, False, False, False, False, False, True]) ] X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True) assert cross_val_score( RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1), X, cv=cv, verbose=0).sum()
Example #14
Source File: classification.py From brainiak with Apache License 2.0 | 6 votes |
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj): # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel # when the kernel matrix is computed in portions; also, this method only works # for self-correlation, i.e. correlation between the same data matrix. # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation # no shuffling in cv skf = model_selection.StratifiedKFold(n_splits=num_subjects, shuffle=False) scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)), y=labels, cv=skf) print(scores) logger.info( 'the overall cross validation accuracy is %.2f' % np.mean(scores) )
Example #15
Source File: site_stability.py From CatLearn with GNU General Public License v3.0 | 6 votes |
def get_chromosome_score(self, X_chromosome): """ Computes fitness using the subset of data in X_chromosome. :param X_chromosome: subset of full data set, containing only a selection of the features. :return: mean R2 or keras history last column entry. """ np.random.seed(self.random_state) # Use either cross validation if self.scoring == 'cv': scores = cross_val_score(self.clf, X_chromosome, np.array(self.y), cv=self.n_cv) return np.mean(scores) # Or keras history in the case of neural networks (based on keras/tensorflow) else: try: history = self.clf.fit(X_chromosome, np.array(self.y)) return history.history[self.scoring][-1] except: raise ValueError('Use either "cv" or keras history metrics.')
Example #16
Source File: gaFeatureSelection.py From GeneticAlgorithmForFeatureSelection with MIT License | 6 votes |
def getFitness(individual, X, y): """ Feature subset fitness function """ if(individual.count(0) != len(individual)): # get index with value 0 cols = [index for index in range( len(individual)) if individual[index] == 0] # get features subset X_parsed = X.drop(X.columns[cols], axis=1) X_subset = pd.get_dummies(X_parsed) # apply classification algorithm clf = LogisticRegression() return (avg(cross_val_score(clf, X_subset, y, cv=5)),) else: return(0,)
Example #17
Source File: mvpa_voxelselector.py From brainiak with Apache License 2.0 | 6 votes |
def _sfn(data, mask, myrad, bcast_var): """Score classifier on searchlight data using cross-validation. The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The number of cross-validation folds is in `bast_var[1]. """ clf = bcast_var[2] masked_data = data[0][mask, :].T # print(l[0].shape, mask.shape, data.shape) skf = model_selection.StratifiedKFold(n_splits=bcast_var[1], shuffle=False) accuracy = np.mean(model_selection.cross_val_score(clf, masked_data, y=bcast_var[0], cv=skf, n_jobs=1)) return accuracy
Example #18
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cross_val_score_predict_groups(): # Check if ValueError (when groups is None) propagates to cross_val_score # and cross_val_predict # And also check if groups is correctly passed to the cv object X, y = make_classification(n_samples=20, n_classes=2, random_state=0) clf = SVC(kernel="linear") group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_score, estimator=clf, X=X, y=y, cv=cv) assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
Example #19
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cross_val_score_with_score_func_classification(): iris = load_iris() clf = SVC(kernel='linear') # Default score (should be the accuracy score) scores = cross_val_score(clf, iris.data, iris.target, cv=5) assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score zo_scores = cross_val_score(clf, iris.data, iris.target, scoring="accuracy", cv=5) assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score f1_scores = cross_val_score(clf, iris.data, iris.target, scoring="f1_weighted", cv=5) assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
Example #20
Source File: test_crossvalidation.py From pysaliency with MIT License | 6 votes |
def test_image_subject_crossvalidation(stimuli, fixation_trains): gsmm = GaussianSaliencyModel() cv = ScikitLearnImageSubjectCrossValidationGenerator(stimuli, fixation_trains) assert unpack_crossval(cv) == [ ([False, False, False, True, True, False, False, False, False], [True, True, True, False, False, False, False, False, False]), ([True, True, True, False, False, False, False, False, False], [False, False, False, True, True, False, False, False, False]) ] X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True) assert cross_val_score( RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1), X, cv=cv, verbose=0).sum()
Example #21
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_precomputed_cross_validation(): # Ensure array is split correctly rng = np.random.RandomState(0) X = rng.rand(20, 2) D = pairwise_distances(X, metric='euclidean') y = rng.randint(3, size=20) for Est in (neighbors.KNeighborsClassifier, neighbors.RadiusNeighborsClassifier, neighbors.KNeighborsRegressor, neighbors.RadiusNeighborsRegressor): metric_score = cross_val_score(Est(), X, y) precomp_score = cross_val_score(Est(metric='precomputed'), D, y) assert_array_equal(metric_score, precomp_score)
Example #22
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim # of the NB models do not harm the performance on a slightly non-linearly # separable dataset digits = load_digits() X, y = digits.data, digits.target binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8) X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8] # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) assert_greater(scores.mean(), 0.86) scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.94) # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) assert_greater(scores.mean(), 0.83) scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) assert_greater(scores.mean(), 0.92) # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) assert_greater(scores.mean(), 0.77) scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) assert_greater(scores.mean(), 0.89) scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.86)
Example #23
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((DataFrame, Series)) except ImportError: pass X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) for InputFeatureType, TargetType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y) def check_df(x): return isinstance(x, InputFeatureType) def check_series(x): return isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}) grid_search.fit(X_df, y_ser).score(X_df, y_ser) grid_search.predict(X_df) assert hasattr(grid_search, "cv_results_")
Example #24
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurf_pipeline_parallel(): """Check: Data (Binary Endpoint, Discrete Features): MultiSURF works in a sklearn pipeline when MultiSURF is parallelized""" np.random.seed(320931) clf = make_pipeline(MultiSURF(n_features_to_select=2, n_jobs=-1), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
Example #25
Source File: eval_train_test.py From fanci with GNU General Public License v3.0 | 5 votes |
def leave_one_group_out_deprecated(clf, data_set: GroupedDataSet, n_jobs=8): log.info('Starting leave on group out cv.') logo = LeaveOneGroupOut() domains, labels, groups = data_set.expand() log.info('Set dimensions: {!s} x {!s} x {!s}'.format(len(domains), len(labels), len(groups))) log.info('Starting feature extraction.') feature_matrix = extract_all_features(domains) if isinstance(clf, SVC): std_scale = preprocessing.StandardScaler() feature_matrix = std_scale.fit_transform(feature_matrix) log.info('Feature extraction finished.') scores = cross_val_score(clf, feature_matrix, labels, groups, cv=logo, scoring=stats_metrics.multi_scorer_gridsearch, n_jobs=n_jobs, verbose=2) return scores
Example #26
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurf_pipeline(): """Check: Data (Binary Endpoint, Discrete Features): MultiSURF works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURF(n_features_to_select=2), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
Example #27
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurfstar_pipeline_parallel(): """Check: Data (Binary Endpoint, Discrete Features): MultiSURF* works in a sklearn pipeline when MultiSURF* is parallelized""" np.random.seed(320931) clf = make_pipeline(MultiSURFstar(n_features_to_select=2, n_jobs=-1), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
Example #28
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurfstar_pipeline(): """Check: Data (Binary Endpoint, Discrete Features): MultiSURF* works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURFstar(n_features_to_select=2), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
Example #29
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_turf_pipeline_parallel(): """Check: Data (Binary Endpoint, Discrete Features): TuRF with ReliefF works in a sklearn pipeline when TuRF is parallelized""" np.random.seed(49082) clf = make_pipeline(TuRF(core_algorithm="ReliefF", n_features_to_select=2, pct=0.5, n_neighbors=100, n_jobs=-1), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, fit_params={ 'turf__headers': headers}, cv=3)) > 0.7
Example #30
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surfstar_pipeline_parallel(): """Check: Data (Binary Endpoint, Discrete Features): SURF* works in a sklearn pipeline when SURF* is parallelized""" np.random.seed(9238745) clf = make_pipeline(SURFstar(n_features_to_select=2, n_jobs=-1), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7