Python sklearn.grid_search.RandomizedSearchCV() Examples
The following are 12
code examples of sklearn.grid_search.RandomizedSearchCV().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.grid_search
, or try the search function
.
Example #1
Source File: Model_Parameters_CV.py From ProFET with GNU General Public License v3.0 | 6 votes |
def GridParamSearch(param_dist, clf, X, y, n_iter_search=15) : ''' Searches using rand.search for best model paramters diff paramters searched by model type.. http://nbviewer.ipython.org/github/treycausey/thespread/blob/master/notebooks/basic_random_forest_wp_model.ipynb?create=1 @param clf: estimator/predictor used. @param param_dist: Grid of Parameter ranges to tune for the predictor, using randomized CV search. ''' print("Starting grid parameter search") random_search = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=n_iter_search,n_jobs=-1) start = time() # random_search.fit(features, target) random_search.fit(X, y) print("RandomizedSearchCV took %.2f seconds for %d candidates" " parameter settings." % ((time() - start), n_iter_search)) report(random_search.grid_scores_)
Example #2
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 6 votes |
def test_example_randomized_search(self): # The classic example from the sklearn documentation iris = datasets.load_iris() parameters = {'kernel': ('linear', 'rbf'), 'C': range(1, 10)} svr = svm.SVC() clf = grid_search.RandomizedSearchCV(svr, parameters, random_state=4) clf.fit(iris.data, iris.target) clf2 = RandomizedSearchCV(self.sc, svr, parameters, random_state=4) clf2.fit(iris.data, iris.target) b1 = clf.estimator b2 = clf2.estimator self.assertEqual(b1.get_params(), b2.get_params())
Example #3
Source File: winfault.py From wt-fdd with GNU General Public License v3.0 | 5 votes |
def svm_class_and_score( X_train, y_train, X_test, y_test, labels, search_type=RandomizedSearchCV, parameter_space={ 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4], 'C': [0.01, .1, 1, 10, 100, 1000], 'class_weight': [ {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']}, score='recall_weighted', iid=True, bagged=False, svm_results=True): """Build an SVM and return its scoring metrics """ print("# Tuning hyper-parameters for %s" % score) print() # Find the Hyperparameters clf = search_type(SVC(C=1), parameter_space, cv=10, scoring=score, iid=iid) # Build the SVM clf.fit(X_train, y_train) print("Hyperparameters found:") print(clf.best_params_) # Make the predictions y_pred = clf.predict(X_test) print() print() print("Results for basic SVM") clf_scoring(y_test, y_pred, labels) if bagged is True: bgg = BaggingClassifier(base_estimator=clf) bgg.fit(X_train, y_train) y_pred = bgg.predict(X_test) print() print() print("Results for bagging:") clf_scoring(y_test, y_pred, labels) return clf, bgg else: return clf
Example #4
Source File: tunemodels.py From Supply-demand-forecasting with MIT License | 5 votes |
def runGridSearch(self, model): logging.debug("run grid search on model: {}".format(model.__class__.__name__)) logging.debug("cross validation strategy: {}".format(model.holdout_split)) logging.debug("used features: {}".format(model.usedFeatures)) logging.debug("tuned parameters: {}".format(model.getTunedParamterOptions())) features,labels,cv = model.getFeaturesLabel() # do grid search if self.do_random_gridsearch: estimator = RandomizedSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv, n_jobs=self.n_jobs, scoring=mean_absolute_percentage_error_scoring, verbose = 500, n_iter=self.n_iter_randomsearch) else: estimator = GridSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv,n_jobs=-self.n_jobs, fit_params=model.get_fit_params(), scoring=mean_absolute_percentage_error_scoring, verbose = 500) estimator.fit(features, labels) model.clf = estimator.best_estimator_ model.save_final_model = True model.save_model() # model.dispFeatureImportance() logging.debug('estimaator parameters: {}'.format(estimator.get_params)) logging.debug('Best parameters: {}'.format(estimator.best_params_)) logging.debug('Best Scores: {}'.format(-estimator.best_score_)) logging.debug('Score grid: {}'.format(estimator.grid_scores_ )) for i in estimator.grid_scores_ : logging.debug('parameters: {}'.format(i.parameters )) logging.debug('mean_validation_score: {}'.format(np.absolute(i.mean_validation_score))) logging.debug('cv_validation_scores: {}'.format(np.absolute(i.cv_validation_scores) )) return
Example #5
Source File: classification.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def create_classif_search(name_clf, clf_pipeline, nb_labels, search_type='random', cross_val=10, eval_metric='f1', nb_iter=250, nb_workers=5): """ create sklearn search depending on spec. random or grid :param int nb_labels: number of labels :param str search_type: hyper-params search type :param str eval_metric: evaluation metric :param int nb_iter: for random number of tries :param str name_clf: name of classif. :param obj clf_pipeline: object :param obj cross_val: obj specific CV for fix train-test :param int nb_workers: number jobs running in parallel :return: """ score_weight = 'weighted' if nb_labels > 2 else 'binary' scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()], average=score_weight) if search_type == 'grid': clf_parameters = create_clf_param_search_grid(name_clf) logging.info('init Grid search...') clf_search = GridSearchCV( clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, n_jobs=nb_workers, verbose=1, refit=True) else: clf_parameters = create_clf_param_search_distrib(name_clf) nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter) logging.info('init Randomized search...') clf_search = RandomizedSearchCV( clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True) return clf_search
Example #6
Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_RandomGlobalParams(self): clf = RandomizedSearchCV( self.__estimator__(layers=[L("Sigmoid")], n_iter=1), param_distributions={'learning_rate': uniform(0.001, 0.01)}, n_iter=2) clf.fit(self.a_in, self.a_out)
Example #7
Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_RandomLayerParams(self): clf = RandomizedSearchCV( self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=2) clf.fit(self.a_in, self.a_out)
Example #8
Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_RandomMultipleJobs(self): clf = RandomizedSearchCV( self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=4, n_jobs=4) clf.fit(self.a_in, self.a_out)
Example #9
Source File: test_search_2.py From spark-sklearn with Apache License 2.0 | 5 votes |
def test_cv_linreg(self): pipeline = SKL_Pipeline([ ('lasso', SKL_Lasso(max_iter=1)) ]) parameters = { 'lasso__alpha': np.linspace(0.001, 0.01, 1000) } n_iter = 10 grid_search = RandomizedSearchCV(self.sc, pipeline, parameters, n_iter=n_iter) X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100))) y = np.array(list(range(0, 100))).reshape((100, 1)) skl_gs = grid_search.fit(X, y) assert len(skl_gs.cv_results_['params']) == n_iter
Example #10
Source File: ml.py From EDeN with MIT License | 4 votes |
def fit_estimator(estimator, positive_data_matrix=None, negative_data_matrix=None, target=None, cv=10, n_jobs=-1, n_iter_search=40, random_state=1): """fit_estimator.""" # hyperparameter optimization param_dist = {"n_iter": randint(5, 100), "power_t": uniform(0.1), "alpha": uniform(1e-08, 1e-03), "eta0": uniform(1e-03, 1), "penalty": ["l1", "l2", "elasticnet"], "learning_rate": ["invscaling", "constant", "optimal"]} scoring = 'roc_auc' n_iter_search = n_iter_search random_search = RandomizedSearchCV(estimator, param_distributions=param_dist, n_iter=n_iter_search, cv=cv, scoring=scoring, n_jobs=n_jobs, random_state=random_state, refit=True) X, y = make_data_matrix(positive_data_matrix=positive_data_matrix, negative_data_matrix=negative_data_matrix, target=target) random_search.fit(X, y) logger.debug('\nClassifier:') logger.debug('%s' % random_search.best_estimator_) logger.debug('\nPredictive performance:') # assess the generalization capacity of the model via a 10-fold cross # validation scoring_strings = ['accuracy', 'precision', 'recall', 'f1', 'average_precision', 'roc_auc'] for scoring in scoring_strings: scores = cross_validation.cross_val_score( random_search.best_estimator_, X, y, cv=cv, scoring=scoring, n_jobs=n_jobs) logger.debug('%20s: %.3f +- %.3f' % (scoring, np.mean(scores), np.std(scores))) return random_search.best_estimator_
Example #11
Source File: test_big.py From skutil with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_large_grid(): """In this test, we purposely overfit a RandomForest to completely random data in order to assert that the test error will far supercede the train error. """ if not SK18: custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42) else: custom_cv = KFold(n_splits=3, shuffle=True, random_state=42) # define the pipe pipe = Pipeline([ ('scaler', SelectiveScaler()), ('pca', SelectivePCA(weight=True)), ('rf', RandomForestClassifier(random_state=42)) ]) # define hyper parameters hp = { 'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()], 'pca__whiten': [True, False], 'pca__weight': [True, False], 'pca__n_components': uniform(0.75, 0.15), 'rf__n_estimators': randint(5, 10), 'rf__max_depth': randint(5, 15) } # define the grid grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42) # this will fail because we haven't fit yet assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train) # fit the grid grid.fit(X_train, y_train) # score for coverage -- this might warn... with warnings.catch_warnings(): warnings.simplefilter("ignore") grid.score(X_train, y_train) # coverage: assert grid._estimator_type == 'classifier' # get predictions tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test) # evaluate score (SHOULD be better than random...) accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred) # grid score reports: # assert fails for bad percentile assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0}) assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0}) # assert fails for bad y_axis assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'}) # assert passes otherwise report_grid_score_detail(grid, charts=True, percentile=0.95) # just ensure percentile works
Example #12
Source File: mnist_parallel.py From mHTM with MIT License | 4 votes |
def main_local(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3, global_inhibition=True, ncores=4, seed=None): """ Perform CV on a subset of the MNIST dataset. Performs parallelizations on a local machine. @param log_dir: The directory to store the results in. @param ntrain: The number of training samples to use. @param ntest: The number of testing samples to use. @param niter: The number of parameter iterations to use. @param nsplits: The number of splits of the data to use. @param global_inhibition: If True use global inhibition; otherwise, use local inhibition. @param ncores: The number of cores to use. @param seed: The seed for the random number generators. """ # Run the initialization x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits, seed) # Build the classifier for doing CV clf = RandomizedSearchCV( estimator=SPRegion(**kargs), param_distributions=params, n_iter=niter, # Total runs n_jobs=ncores, # Use this many number of cores pre_dispatch=1 * ncores, # Give each core two jobs at a time iid=True, # Data is iid across folds cv=cv, # The CV split for the data refit=False, # Disable fitting best estimator on full dataset random_state=seed # Force same SP across runs ) # Fit the models clf.fit(x, y) # Extract the CV results parameter_names = sorted(clf.grid_scores_[0].parameters.keys()) parameter_names.pop(parameter_names.index('log_dir')) parameter_values = np.zeros((niter, len(parameter_names))) results = np.zeros((niter, nsplits)) for i, score in enumerate(clf.grid_scores_): parameter_values[i] = np.array([score.parameters[k] for k in parameter_names]) results[i] = score.cv_validation_scores # Save the CV results with open(os.path.join(log_dir, 'cv_results.pkl'), 'wb') as f: cPickle.dump((parameter_names, parameter_values, results), f, cPickle.HIGHEST_PROTOCOL) with open(os.path.join(log_dir, 'cv_clf.pkl'), 'wb') as f: cPickle.dump((clf.grid_scores_, clf.best_score_, clf.best_params_), f, cPickle.HIGHEST_PROTOCOL)