Python sklearn.model_selection.GridSearchCV() Examples
The following are 30
code examples of sklearn.model_selection.GridSearchCV().
Example #1
Source File: From cwcf with MIT License | 8 votes |
def get_full_rbf_svm_clf(train_x, train_y, c_range=None, gamma_range=None): param_grid = dict(gamma=gamma_range, C=c_range) cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42) grid = GridSearchCV(SVC(cache_size=1024), param_grid=param_grid, cv=cv, n_jobs=14, verbose=10), train_y) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) scores = grid.cv_results_['mean_test_score'].reshape(len(c_range), len(gamma_range)) print("Scores:") print(scores) print("c_range:", c_range) print("gamma_range:", gamma_range) c_best = grid.best_params_['C'] gamma_best = grid.best_params_['gamma'] clf = SVC(C=c_best, gamma=gamma_best, verbose=True) return clf #----------------
Example #2
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 8 votes |
def test_check_scoring_gridsearchcv(): # test that check_scoring works on GridSearchCV and pipeline. # slightly redundant non-regression test. grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]}) scorer = check_scoring(grid, "f1") assert isinstance(scorer, _PredictScorer) pipe = make_pipeline(LinearSVC()) scorer = check_scoring(pipe, "f1") assert isinstance(scorer, _PredictScorer) # check that cross_val_score definitely calls the scorer # and doesn't make any assumptions about the estimator apart from having a # fit. scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1], scoring=DummyScorer()) assert_array_equal(scores, 1)
Example #3
Source File: From text-classifier with Apache License 2.0 | 7 votes |
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)): # grid search找到最好的参数 parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]} clf = GridSearchCV(model, param_grid=parameters) grid_search =, y_train) # 对结果打分 print("Best score: %0.3f" % grid_search.best_score_) print(grid_search.best_estimator_) # best prarams print('best prarams:', clf.best_params_) print('-----grid search end------------') print('on all train set') scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy') print(scores.mean(), scores) print('on test set') scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy') print(scores.mean(), scores)
Example #4
Source File: From pyglmnet with MIT License | 6 votes |
def test_cv(): """Simple CV check.""" # XXX: don't use scikit-learn for tests. X, y = make_regression() cv = KFold(n_splits=5) glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1) # check that it returns 5 scores scores = cross_val_score(glm_normal, X, y, cv=cv) assert(len(scores) == 5) param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)}, {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))}] glmcv = GridSearchCV(glm_normal, param_grid, cv=cv), y)
Example #5
Source File: From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [0.001 ,0.01 ,0.1 ,1 , 10, 100, 1000, 10000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001 , 0.0001, 0.001, 0.01, 0.1, 1, 10, 100] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='rbf'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters print grid_search.best_params_
Example #6
Source File: From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [0.001, 0.01, 0.1 ,1, 10, 100, 1000, 10000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1 , 1, 10, 100, 1000] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='poly'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters print grid_search.best_params_
Example #7
Source File: From fireTS with MIT License | 6 votes |
def grid_search(self, X, y, para_grid, **params): """ Perform grid search on the base_estimator. The function first generates the lag features and predicting targets, and then calls ``GridSearchCV`` in scikit-learn package. :param array-like X: exogenous input time series, shape = (n_samples, n_exog_inputs) :param array-like y: target time series to predict, shape = (n_samples) :param dict para_grid: use the same format in ``GridSearchCV`` in scikit-learn package. :param dict params: other keyword arguments that can be passed into ``GridSearchCV`` in scikit-learn package. """ grid = GridSearchCV(self.base_estimator, para_grid, **params) X, y = self._check_and_preprocess_X_y(X, y) features, target = self._preprocess_data(X, y), target) self.set_params(**grid.best_params_)
Example #8
Source File: From scikit-gstat with MIT License | 6 votes |
def test_find_best_model_future_cv(self): """ cv parameter will change to 5 in sklearn 0.22 This will change the result, though """ parameters = dict( model=('spherical', 'gaussian', 'exponential', 'matern') ) gs = GridSearchCV( VariogramEstimator(n_lags=15, normalize=False), parameters, cv=5 ) gs =, self.v) self.assertEqual(gs.best_params_['model'], 'matern')
Example #9
Source File: From scikit-gstat with MIT License | 6 votes |
def test_find_best_model(self): """ Use GridSearchCV to find the best model for the given data which should be the spherical model """ parameters = dict( model=('spherical', 'gaussian', 'exponential', 'matern') ) gs = GridSearchCV( VariogramEstimator(n_lags=15, normalize=False), parameters, cv=3 ) gs =, self.v) self.assertEqual(gs.best_params_['model'], 'spherical')
Example #10
Source File: From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [1, 10, 100, 1000, 10000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='rbf'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters print grid_search.best_params_
Example #11
Source File: From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_grid_search_with_slds_X_and_slds_y( self, slds, slds_y, classifier_module): from sklearn.model_selection import GridSearchCV from skorch import NeuralNetClassifier net = NeuralNetClassifier( classifier_module, train_split=False, verbose=0, ) params = { 'lr': [0.01, 0.02], 'max_epochs': [10, 20], } gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', iid=True), slds_y) # does not raise
Example #12
Source File: From malss with MIT License | 6 votes |
def __tune_parameters(self): for i in range(len(self.algorithms)): if self.verbose: print(' %s' % self.algorithms[i].name) estimator = self.algorithms[i].estimator parameters = self.algorithms[i].parameters clf = GridSearchCV( estimator, parameters,, scoring=self.scoring, iid=False, n_jobs=self.n_jobs), grid_scores = [] for j in range(len(clf.cv_results_['mean_test_score'])): grid_scores.append((clf.cv_results_['params'][j], clf.cv_results_['mean_test_score'][j], clf.cv_results_['std_test_score'][j])) self.algorithms[i].estimator = clf.best_estimator_ self.algorithms[i].best_score = clf.best_score_ self.algorithms[i].best_params = clf.best_params_ self.algorithms[i].grid_scores = grid_scores self.__search_best_algorithm()
Example #13
Source File: From scVI with MIT License | 6 votes |
def compute_accuracy_svc( data_train, labels_train, data_test, labels_test, param_grid=None, verbose=0, max_iter=-1, ): if param_grid is None: param_grid = [ {"C": [1, 10, 100, 1000], "kernel": ["linear"]}, {"C": [1, 10, 100, 1000], "gamma": [0.001, 0.0001], "kernel": ["rbf"]}, ] svc = SVC(max_iter=max_iter) clf = GridSearchCV(svc, param_grid, verbose=verbose, cv=3) return compute_accuracy_classifier( clf, data_train, labels_train, data_test, labels_test )
Example #14
Source File: From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_grid_search_with_slds_works( self, slds, y, classifier_module): from sklearn.model_selection import GridSearchCV from skorch import NeuralNetClassifier net = NeuralNetClassifier( classifier_module, train_split=False, verbose=0, ) params = { 'lr': [0.01, 0.02], 'max_epochs': [10, 20], } gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', iid=True), y) # does not raise
Example #15
Source File: From TextSentimentClassification with MIT License | 6 votes |
def train(self, trainPath=general_config.data_dir+"/training_label_new.txt", num_cv=5): indices, sentences, labels=readNewFile(file=trainPath, vocab2intPath=general_config.global_static_v2i_path) sentences_=[] for sentence in sentences: sentences_.append(self.embeddings[sentence].mean(axis=0)) parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100]} # Inverse of regularization strength self.model = GridSearchCV(self.model, parameters, cv=num_cv, refit=True),y=labels)"Training Accuracy: %s"%self.model.score(X=sentences_,y=labels)) save_path = self.save_dir + "/model.pkl" joblib.dump(self.model, save_path)
Example #16
Source File: From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_grid_search_with_dict_works( self, sldict_cls, data, classifier_module): from sklearn.model_selection import GridSearchCV from skorch import NeuralNetClassifier net = NeuralNetClassifier(classifier_module) X, y = data X = sldict_cls(X=X) params = { 'lr': [0.01, 0.02], 'max_epochs': [10, 20], } gs = GridSearchCV(net, params, refit=True, cv=3, scoring='accuracy', iid=True), y) print(gs.best_score_, gs.best_params_)
Example #17
Source File: From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y=None, groups=None): """Run fit with all sets of parameters. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape=(n_samples,) or (n_samples, n_output), optional (default=None) Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, shape=(n_samples,), optional (default=None) Group labels for the samples used while splitting the dataset into train/test set. """ return super(GridSearchCV, self).fit(X, _as_numpy(y), groups)
Example #18
Source File: From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [1, 10, 100, 1000, 10000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='rbf'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters return grid_search.best_params_
Example #19
Source File: graph_spectral_analysis& From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [0.001, 0.01, 0.1 ,1, 10, 100, 1000, 10000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1 , 1, 10, 100, 1000] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='poly'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters print grid_search.best_params_
Example #20
Source File: From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def paramTuning(features_train, labels_train, nfolds): #using the training data and define the number of folds #determine the range of the Cs range you want to search Cs = [1000, 10000, 10000, 1000000] #determine the range of the gammas range you want to search gammas = [0.00000001 ,0.00000001 ,0.0000001, 0.000001, 0.00001] #make the dictioanry param_grid = {'C': Cs, 'gamma': gammas} #start the greedy search using all the matching sets from above grid_search = GridSearchCV(SVC(kernel='rbf'),param_grid,cv=nfolds) #fit your training data, labels_train) #visualize the best couple of parameters return grid_search.best_params_
Example #21
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R')} clf = GridSearchCV(boost, parameters), # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters),
Example #22
Source File: From pylift with BSD 2-Clause "Simplified" License | 6 votes |
def grid_search(self, **kwargs): """Grid search using sklearn.model_selection.GridSearchCV. Any parameters typically associated with GridSearchCV (see sklearn documentation) can be passed as keyword arguments to this function. The final dictionary used for the grid search is saved to `self.grid_search_params`. This is updated with any parameters that are passed. Examples -------- # Passing kwargs. self.grid_search(param_grid={'max_depth':[2,3,5,10]}, refit=True) """ self.grid_search_params.update(kwargs) self.grid_search_ = GridSearchCV(**self.grid_search_params), self.transformed_y_train) return self.grid_search_
Example #23
Source File: From pylift with BSD 2-Clause "Simplified" License | 6 votes |
def grid_search(self, **kwargs): """Grid search using sklearn.model_selection.GridSearchCV. Any parameters typically associated with GridSearchCV (see sklearn documentation) can be passed as keyword arguments to this function. The final dictionary used for the grid search is saved to `self.grid_search_params`. This is updated with any parameters that are passed. Examples -------- # Passing kwargs. self.grid_search(param_grid={'max_depth':[2,3,5,10]}, refit=True) """ self.grid_search_params.update(kwargs) self.grid_search_ = GridSearchCV(**self.grid_search_params), self.transformed_y_train) return self.grid_search_
Example #24
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_imputation_pipeline_grid_search(): # Test imputation within a pipeline + gridsearch. X = sparse_random_matrix(100, 100, density=0.10) missing_values =[0] pipeline = Pipeline([('imputer', SimpleImputer(missing_values=missing_values)), ('tree', tree.DecisionTreeRegressor(random_state=0))]) parameters = { 'imputer__strategy': ["mean", "median", "most_frequent"] } Y = sparse_random_matrix(100, 1, density=0.10).toarray() gs = GridSearchCV(pipeline, parameters), Y)
Example #25
Source File: From ml-parameter-optimization with MIT License | 6 votes |
def apply_gridsearch(self,model): """ apply grid search on ml algorithm to specified parameters returns updated best score and parameters """ # check if custom evalution function is specified if callable(self.params_cv['scoring']): scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better) else: scoring = self.params_cv['scoring'] gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring, iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs']),self.y) # update best model if best_score is improved if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult): self.best_model = clone(gsearch.best_estimator_) self.best_score = gsearch.best_score_ # update tuned parameters with optimal values for key,value in gsearch.best_params_.items(): self._params[key] = value self._temp_score = gsearch.best_score_ return self
Example #26
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_set_params_passes_all_parameters(): # Make sure all parameters are passed together to set_params # of nested estimator. Regression test for #9944 class TestDecisionTree(DecisionTreeClassifier): def set_params(self, **kwargs): super().set_params(**kwargs) # expected_kwargs is in test scope assert kwargs == expected_kwargs return self expected_kwargs = {'max_depth': 5, 'min_samples_leaf': 2} for est in [Pipeline([('estimator', TestDecisionTree())]), GridSearchCV(TestDecisionTree(), {})]: est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)
Example #27
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="precomputed", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))]) param_grid = dict(Perceptron__max_iter=np.arange(1, 5)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.), y) assert_equal(grid_search.best_score_, 1) # 0.23. warning about tol not having its correct default value.
Example #28
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multi_output_predict_proba(): sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3) param = {'loss': ('hinge', 'log', 'modified_huber')} # inner function for custom scoring def custom_scorer(estimator, X, y): if hasattr(estimator, "predict_proba"): return 1.0 else: return 0.0 grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param, scoring=custom_scorer, cv=3, error_score=np.nan) multi_target_linear = MultiOutputClassifier(grid_clf), y) multi_target_linear.predict_proba(X) # SGDClassifier defaults to loss='hinge' which is not a probabilistic # loss function; therefore it does not expose a predict_proba method sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3) multi_target_linear = MultiOutputClassifier(sgd_linear_clf), y) err_msg = "The base estimator should implement predict_proba method" with pytest.raises(ValueError, match=err_msg): multi_target_linear.predict_proba(X) # 0.23. warning about tol not having its correct default value.
Example #29
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_elastic_net_vs_l1_l2(C): # Make sure that elasticnet with grid search on l1_ratio gives same or # better results than just l1 or just l2. X, y = make_classification(500, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) param_grid = {'l1_ratio': np.linspace(0, 1, 5)} enet_clf = LogisticRegression(penalty='elasticnet', C=C, solver='saga', random_state=0) gs = GridSearchCV(enet_clf, param_grid, cv=5, iid=False, refit=True) l1_clf = LogisticRegression(penalty='l1', C=C, solver='saga', random_state=0) l2_clf = LogisticRegression(penalty='l2', C=C, solver='saga', random_state=0) for clf in (gs, l1_clf, l2_clf):, y_train) assert gs.score(X_test, y_test) >= l1_clf.score(X_test, y_test) assert gs.score(X_test, y_test) >= l2_clf.score(X_test, y_test)
Example #30
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ridgecv_sample_weight(): rng = np.random.RandomState(0) alphas = (0.1, 1.0, 10.0) # There are different algorithms for n_samples > n_features # and the opposite, so test them both. for n_samples, n_features in ((6, 5), (5, 10)): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) cv = KFold(5) ridgecv = RidgeCV(alphas=alphas, cv=cv), y, sample_weight=sample_weight) # Check using GridSearchCV directly parameters = {'alpha': alphas} gs = GridSearchCV(Ridge(), parameters, cv=cv), y, sample_weight=sample_weight) assert ridgecv.alpha_ == gs.best_estimator_.alpha assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)