Python sklearn.grid_search.GridSearchCV() Examples
The following are 30
code examples of sklearn.grid_search.GridSearchCV().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.grid_search
, or try the search function
.
Example #1
Source File: test_grid_search.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_result(self): X, y, Z = self.make_classification(2, 40000, nonnegative=True) parameters = {'alpha': [0.1, 1, 10]} fit_params = {'classes': np.unique(y)} local_estimator = MultinomialNB() local_grid = GridSearchCV(estimator=local_estimator, param_grid=parameters) estimator = SparkMultinomialNB() grid = SparkGridSearchCV(estimator=estimator, param_grid=parameters, fit_params=fit_params) local_grid.fit(X, y) grid.fit(Z) locscores = [r.mean_validation_score for r in local_grid.grid_scores_] scores = [r.mean_validation_score for r in grid.grid_scores_] assert_array_almost_equal(locscores, scores, decimal=2)
Example #2
Source File: RegressionDecisionTree.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'max_features': ['sqrt', 'log2', None], 'max_depth': range(2,1000), } ] reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "MSE for test data set:\n" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_true, y_pred)
Example #3
Source File: RegressionRidgeReg.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'alpha': np.logspace(-5,5) } ] reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print reg.scorer_ print "MSE for test data set:" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_pred, y_true)
Example #4
Source File: RegressionKNN.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'weights': ['uniform', 'distance'], 'n_neighbors': range(2,100) } ] reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print reg.scorer_ print "MSE for test data set:" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_pred, y_true)
Example #5
Source File: RegressionRandomForest.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): #Set the parameters by cross-validation tuned_parameters = [{'max_depth': range(20,60), 'n_estimators': range(10,40), 'max_features': ['sqrt', 'log2', None] } ] clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "MSE for test data set:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print mean_squared_error(y_true, y_pred)
Example #6
Source File: ClassificationSVM.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'kernel': ['rbf'], 'gamma': np.logspace(-4, 3, 30), 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}, {'kernel': ['poly'], 'degree': [1, 2, 3, 4], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'coef0': np.logspace(-4, 3, 30)}, {'kernel': ['linear'], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}] clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
Example #7
Source File: ClassificationLogReg.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'penalty': ['l1'], 'C': np.logspace(-5,5)}, {'penalty': ['l2'], 'C': np.logspace(-5,5)}] clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
Example #8
Source File: ClassificationKNN.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'weights': ['uniform', 'distance'], 'n_neighbors': range(2,60) } ] clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
Example #9
Source File: ClassificationDecisionTree.py From AirTicketPredicting with MIT License | 6 votes |
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'max_depth': range(2,60), 'max_features': ['sqrt', 'log2', None] } ] clf = GridSearchCV(DecisionTreeClassifier(max_depth=5), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
Example #10
Source File: GC_script.py From ClimateVegetationDynamics_GrangerCausality with GNU General Public License v3.0 | 6 votes |
def nestedCrossValidation(X, y, cvFolds, estimator): kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30) cv_j=0 param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]} r2 = np.zeros((cvFolds,1)) for train_index, test_index in kf: train_X = X[train_index,:] test_X = X[test_index,:] train_y = y[train_index] test_y = y[test_index] grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error') grid.fit(train_X,train_y) y_true, y_pred = test_y,grid.best_estimator_.predict(test_X) r2[cv_j] = r2_score(y_true, y_pred) cv_j = cv_j + 1 return r2 #%% main script
Example #11
Source File: classif_and_ktst.py From jstsp2015 with MIT License | 6 votes |
def compute_svm_score_nestedCV(K, y, n_folds, scoring=balanced_accuracy_scoring, random_state=None, param_grid=[{'C': np.logspace(-5, 5, 25)}]): """Compute cross-validated score of SVM using precomputed kernel. """ cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True, random_state=random_state) scores = np.zeros(n_folds) for i, (train, test) in enumerate(cv): cvclf = SVC(kernel='precomputed') y_train = y[train] cvcv = StratifiedKFold(y_train, n_folds=n_folds, shuffle=True, random_state=random_state) clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring, cv=cvcv, n_jobs=1) clf.fit(K[train, :][:, train], y_train) # print clf.best_params_ scores[i] = clf.score(K[test, :][:, train], y[test]) return scores.mean()
Example #12
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 6 votes |
def test_cv_pipeline(self): pipeline = SKL_Pipeline([ ('vect', SKL_HashingVectorizer(n_features=20)), ('tfidf', SKL_TfidfTransformer(use_idf=False)), ('lasso', SKL_Lasso()) ]) parameters = { 'lasso__alpha': (0.001, 0.005, 0.01) } grid_search = GridSearchCV(self.sc, pipeline, parameters) data = [('hi there', 0.0), ('what is up', 1.0), ('huh', 1.0), ('now is the time', 5.0), ('for what', 0.0), ('the spark was there', 5.0), ('and so', 3.0), ('were many socks', 0.0), ('really', 1.0), ('too cool', 2.0)] df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas() skl_gs = grid_search.fit(df.review.values, df.rating.values) assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])
Example #13
Source File: 04_sent.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 6 votes |
def __grid_search_model(clf_factory, X, Y): cv = ShuffleSplit( n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0) param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)], vect__min_df=[1, 2], vect__smooth_idf=[False, True], vect__use_idf=[False, True], vect__sublinear_tf=[False, True], vect__binary=[False, True], clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1], ) grid_search = GridSearchCV(clf_factory(), param_grid=param_grid, cv=cv, score_func=f1_score, verbose=10) grid_search.fit(X, Y) clf = grid_search.best_estimator_ print clf return clf
Example #14
Source File: 02_tuning.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 6 votes |
def grid_search_model(clf_factory, X, Y): cv = ShuffleSplit( n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0) param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)], vect__min_df=[1, 2], vect__stop_words=[None, "english"], vect__smooth_idf=[False, True], vect__use_idf=[False, True], vect__sublinear_tf=[False, True], vect__binary=[False, True], clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1], ) grid_search = GridSearchCV(clf_factory(), param_grid=param_grid, cv=cv, score_func=f1_score, verbose=10) grid_search.fit(X, Y) clf = grid_search.best_estimator_ print clf return clf
Example #15
Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_GridLayerParams(self): clf = GridSearchCV( self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_grid={'hidden0__units': [4, 8, 12]}) clf.fit(self.a_in, self.a_out)
Example #16
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def selection_parameters_for_classfier(X,y): from sklearn import grid_search #paras={ 'n_neighbors':[1,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree','kd_tree', 'brute'], 'leaf_size':[20,50]} #knn = KNeighborsClassifier() #naive_bayes #nbg = GaussianNB() #nbm = MultinomialNB() #nbb = BernoulliNB() #decision tree #paras={ 'criterion':['gini','entropy'], 'splitter':['random', 'best'], 'max_features':[None, 'auto','sqrt', 'log2'], 'min_samples_split':[1,10]} #dtree = DecisionTreeClassifier() #random forest #rforest = RandomForestClassifier() #paras={ 'n_estimators':[2,15], 'criterion':['gini','entropy'], 'max_features': ['auto','sqrt', 'log2'], 'min_samples_split':[1,10]} #svm svmm = svm.SVC() paras={'kernel':['rbf','linear','poly']} clt =grid_search.GridSearchCV(svmm, paras, cv=5) clt.fit(X,y) print (clt) #print (clt.get_params()) print (clt.set_params()) print (clt.score(X,y)) #scores = cross_val_score(clt,X,y,cv=10) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) #this is to get score using cross_validation
Example #17
Source File: model_selection.py From StrepHit with GNU General Public License v3.0 | 5 votes |
def fit(self, training_sets): """ Searches for the best estimator and its arguments as well as the best training set amongst those specified. :param generator training_sets: Training set to use. Should be a sequence of tuples (x, y, metadata) where x is the training set, y is the correct answer for each chunk and metadata contains additional data that will be returned back :return: the metadata of the training set which yielded the best score, the best score obtained by the model, parameters of the model and fitted model itself :rtype: tuple """ best_training, best_score, best_params, best_model = None, None, None, None for i, (metadata, extractor) in enumerate(training_sets): for model, grid in self.models: assert isclass(model) x, y = extractor.get_features(refit=True) grid['model_cls'] = [model] grid['selector_column'] = [None, extractor.lu_column()] search = GridSearchCV( FeatureSelectedClassifier(model), param_grid=grid, **self.kwargs ) search.fit(x, y) score, params, model = search.best_score_, search.best_params_, search.best_estimator_ logger.debug('%s with parameters %s and training meta %s has score %s', type(model), params, metadata, score) if best_score is None or score > best_score: best_training, best_score, best_params, best_model = (x, y, metadata), score, params, model return best_training, best_score, best_params, best_model # needs to be pickleable and callable
Example #18
Source File: model_selection.py From StrepHit with GNU General Public License v3.0 | 5 votes |
def __init__(self, *models, **kwargs): """ Initializes the grid search :param list models: List of models to use. Each one should be a tuple with a model instance or class and a dictionary for the search space. :param kwargs: addition initialization arguments for `sklearn.grid_search.GridSearchCV` """ self.models = filter(None, models) kwargs['refit'] = True self.kwargs = kwargs
Example #19
Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_GridGlobalParams(self): clf = GridSearchCV( self.__estimator__(layers=[L(self.__output__)], n_iter=1), param_grid={'learning_rate': [0.01, 0.001]}) clf.fit(self.a_in, self.a_out)
Example #20
Source File: model.py From crypto_predictor with MIT License | 5 votes |
def get_best_model(model, parameters, X_train, y_train): clf = GridSearchCV(model, parameters, cv=4, n_jobs=-1) clf.fit(X_train, y_train) # print(clf.best_params_) return clf.best_estimator_
Example #21
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 5 votes |
def test_example(self): # The classic example from the sklearn documentation iris = datasets.load_iris() parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]} svr = svm.SVC(gamma='auto') clf = grid_search.GridSearchCV(svr, parameters) clf.fit(iris.data, iris.target) clf2 = GridSearchCV(self.sc, svr, parameters) clf2.fit(iris.data, iris.target) b1 = clf.estimator b2 = clf2.estimator self.assertEqual(b1.get_params(), b2.get_params())
Example #22
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 5 votes |
def test_cv_linreg(self): pipeline = SKL_Pipeline([ ('lasso', SKL_Lasso()) ]) parameters = { 'lasso__alpha': (0.001, 0.005, 0.01) } grid_search = GridSearchCV(self.sc, pipeline, parameters) X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100))) y = np.array(list(range(0, 100))).reshape((100, 1)) skl_gs = grid_search.fit(X, y) assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])
Example #23
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 5 votes |
def test_cv_lasso_with_mllib_featurization(self): data = [('hi there', 0.0), ('what is up', 1.0), ('huh', 1.0), ('now is the time', 5.0), ('for what', 0.0), ('the spark was there', 5.0), ('and so', 3.0), ('were many socks', 0.0), ('really', 1.0), ('too cool', 2.0)] data = self.sql.createDataFrame(data, ["review", "rating"]) # Feature extraction using MLlib tokenizer = Tokenizer(inputCol="review", outputCol="words") hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20000) pipeline = Pipeline(stages=[tokenizer, hashingTF]) data = pipeline.fit(data).transform(data) df = self.converter.toPandas(data.select(data.features.alias("review"), "rating")) pipeline = SKL_Pipeline([ ('lasso', SKL_Lasso()) ]) parameters = { 'lasso__alpha': (0.001, 0.005, 0.01) } grid_search = GridSearchCV(self.sc, pipeline, parameters) skl_gs = grid_search.fit(df.review.values, df.rating.values) assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])
Example #24
Source File: classifier.py From TextDetector with GNU General Public License v3.0 | 5 votes |
def __init__(self, mode = 'adaboost'): if mode == 'adaboost': clf = GradientBoostingRegressor( learning_rate = 1, n_estimators = 1000, max_depth = 3, random_state = 0) elif mode == 'randomforest': clf = RandomForestRegressor( n_estimators = 10, max_depth = None, n_jobs = -1) elif mode == 'SVM': clf = SVC(C = 10.0, kernel = 'linear', ) elif mode == 'vjcascade': clf = vjcascade(n_stage=30, n_esti = 1, l_rate = 1) elif mode == 'gridSearch': param_grid = [ {'max_depth': [1, 2, 3], 'loss': ['ls', 'lad']}, ] gbr = GradientBoostingRegressor() clf = grid_search.GridSearchCV(gbr, param_grid, n_jobs = -1) else: raise Exception('no mode named: '+mode+' found!') self.classifier = clf self.mode = mode
Example #25
Source File: functions.py From topicModelling with GNU General Public License v3.0 | 5 votes |
def perform_class(X, y, iterations=1): scores = [] for i in range(iterations): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations) parameters = {'C':[0.01, 0.1, 1, 10, 100]} clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy') clf_acc.fit(X_train, y_train) scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')]) acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores]) mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores]) return acc, mif
Example #26
Source File: functions.py From topicModelling with GNU General Public License v3.0 | 5 votes |
def perform_class(X, y, iterations=1): scores = [] for i in range(iterations): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations) parameters = {'C':[0.01, 0.1, 1, 10, 100]} clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy') clf_acc.fit(X_train, y_train) scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')]) acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores]) mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores]) return acc, mif
Example #27
Source File: classification.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def create_classif_search(name_clf, clf_pipeline, nb_labels, search_type='random', cross_val=10, eval_metric='f1', nb_iter=250, nb_workers=5): """ create sklearn search depending on spec. random or grid :param int nb_labels: number of labels :param str search_type: hyper-params search type :param str eval_metric: evaluation metric :param int nb_iter: for random number of tries :param str name_clf: name of classif. :param obj clf_pipeline: object :param obj cross_val: obj specific CV for fix train-test :param int nb_workers: number jobs running in parallel :return: """ score_weight = 'weighted' if nb_labels > 2 else 'binary' scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()], average=score_weight) if search_type == 'grid': clf_parameters = create_clf_param_search_grid(name_clf) logging.info('init Grid search...') clf_search = GridSearchCV( clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, n_jobs=nb_workers, verbose=1, refit=True) else: clf_parameters = create_clf_param_search_distrib(name_clf) nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter) logging.info('init Randomized search...') clf_search = RandomizedSearchCV( clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True) return clf_search
Example #28
Source File: reduction.py From aggregation with Apache License 2.0 | 5 votes |
def shrunk_cov_score(X): shrinkages = np.logspace(-2, 0, 30) cv = GridSearchCV(ShrunkCovariance(), {'shrinkage': shrinkages}) return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))
Example #29
Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License | 5 votes |
def SVM_stacking(y_train, X_train, X_test): parameters = {'kernel': ('linear', 'rbf'), 'C': np.linspace(1, 10, 10), 'gamma': np.linspace(1e-3, 1., 10)} svr = svm.SVR() clf = GridSearchCV(svr, parameters, n_jobs=3, verbose=1, cv=10, scoring=spearman_scoring) clf.fit(X_train, y_train.flatten()) return clf.predict(X_test)
Example #30
Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License | 5 votes |
def GBR_stacking(y_train, X_train, X_test): param_grid = {'learning_rate': [0.1, 0.05, 0.01], 'max_depth': [2, 3, 4, 5], # [2, 3, 4, 6], 'min_samples_leaf': [1, 2, 3], # ,5, 7], 'max_features': [1.0, 0.5, 0.3, 0.1]} est = en.GradientBoostingRegressor(loss='ls', n_estimators=100) clf = GridSearchCV(est, param_grid, n_jobs=3, verbose=1, cv=20, scoring=spearman_scoring).fit(X_train, y_train.flatten()) # clf.fit(X_train, y_train.flatten()) return clf.predict(X_test)