Python sklearn.ensemble.BaggingRegressor() Examples
The following are 25
code examples of sklearn.ensemble.BaggingRegressor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 9 votes |
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR(gamma='scale')]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #2
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert_equal(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert_greater(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(), bootstrap=True).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: assert estimator.training_size_ == X_train.shape[0] training_hash.append(estimator.training_hash_) assert len(set(training_hash)) == len(training_hash)
Example #3
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_equal(boston.data.shape[1], np.unique(features).shape[0]) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_greater(boston.data.shape[1], np.unique(features).shape[0])
Example #4
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_equal(boston.data.shape[1], np.unique(features).shape[0]) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_greater(boston.data.shape[1], np.unique(features).shape[0])
Example #5
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #6
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #7
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR()]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #8
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert_equal(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert_greater(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train))
Example #9
Source File: ABuMLCreater.py From abu with GNU General Public License v3.0 | 5 votes |
def bagging_regressor_best(self, x, y, param_grid=None, assign=True, n_jobs=-1, show=True): """ 寻找BaggingRegressor构造器的最优参数 上层AbuML中bagging_regressor_best函数,直接使用AbuML中的x,y数据调用 eg: bagging_regressor_best无param_grid参数调用: from abupy import AbuML, ml ttn_abu = AbuML.create_test_more_fiter() ttn_abu.bagging_regressor_best() bagging_regressor_best有param_grid参数调用: param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)} ttn_abu.bagging_regressor_best(param_grid=param_grid, n_jobs=-1) out: BaggingRegressor(max_samples=4, n_estimators=250) :param x: 训练集x矩阵,numpy矩阵 :param y: 训练集y序列,numpy序列 :param param_grid: 最优字典关键字参数, eg:param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)} :param assign: 是否保存实例化后最优参数的学习器对象,默认True :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数 :param show: 是否可视化最优参数搜索结果 :return: 通过最优参数构造的BaggingRegressor对象 """ return self._estimators_prarms_best(self.bagging_regressor, x, y, param_grid, assign, n_jobs, show)
Example #10
Source File: ABuMLCreater.py From abu with GNU General Public License v3.0 | 5 votes |
def bagging_regressor(self, assign=True, base_estimator=DecisionTreeRegressor(), **kwargs): """ 有监督学习回归器,实例化BaggingRegressor,默认使用: BaggingRegressor(base_estimator=base_estimator, n_estimators=200, bootstrap=True, oob_score=True, random_state=1) 通过**kwargs即关键字参数透传BaggingRegressor,即: BaggingRegressor(**kwargs) :param base_estimator: 默认使用DecisionTreeRegressor() :param assign: 是否保存实例后的BaggingRegressor对象,默认True,self.reg = reg :param kwargs: 有参数情况下初始化: BaggingRegressor(**kwargs) 无参数情况下初始化: BaggingRegressor(base_estimator=base_estimator, reg_core, n_estimators=200, bootstrap=True, oob_score=True, random_state=1) :return: 实例化的BaggingRegressor对象 """ if kwargs is not None and len(kwargs) > 0: if 'base_estimator' not in kwargs: kwargs['base_estimator'] = base_estimator reg = BaggingRegressor(**kwargs) else: reg = BaggingRegressor(base_estimator=base_estimator, n_estimators=200, bootstrap=True, oob_score=True, random_state=1) if assign: self.reg = reg return reg
Example #11
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.ensemble.AdaBoostClassifier, ensemble.AdaBoostClassifier) self.assertIs(df.ensemble.AdaBoostRegressor, ensemble.AdaBoostRegressor) self.assertIs(df.ensemble.BaggingClassifier, ensemble.BaggingClassifier) self.assertIs(df.ensemble.BaggingRegressor, ensemble.BaggingRegressor) self.assertIs(df.ensemble.ExtraTreesClassifier, ensemble.ExtraTreesClassifier) self.assertIs(df.ensemble.ExtraTreesRegressor, ensemble.ExtraTreesRegressor) self.assertIs(df.ensemble.GradientBoostingClassifier, ensemble.GradientBoostingClassifier) self.assertIs(df.ensemble.GradientBoostingRegressor, ensemble.GradientBoostingRegressor) self.assertIs(df.ensemble.IsolationForest, ensemble.IsolationForest) self.assertIs(df.ensemble.RandomForestClassifier, ensemble.RandomForestClassifier) self.assertIs(df.ensemble.RandomTreesEmbedding, ensemble.RandomTreesEmbedding) self.assertIs(df.ensemble.RandomForestRegressor, ensemble.RandomForestRegressor) self.assertIs(df.ensemble.VotingClassifier, ensemble.VotingClassifier)
Example #12
Source File: baselines.py From AirBnbPricePrediction with MIT License | 5 votes |
def get_ensemble_models(): rf = RandomForestRegressor( n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42, n_jobs=int(0.8*n_cores)) bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores)) extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores)) ada = AdaBoostRegressor(random_state=42) grad = GradientBoostingRegressor(n_estimators=101, random_state=42) classifier_list = [rf, bag, extra, ada, grad] classifier_name_list = ['Random Forests', 'Bagging', 'Extra Trees', 'AdaBoost', 'Gradient Boost'] return classifier_list, classifier_name_list
Example #13
Source File: impute.py From skoot with MIT License | 5 votes |
def __init__(self, cols=None, predictors=None, base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, n_jobs=1, random_state=None, verbose=0, tmp_fill=-999., as_df=True): super(BaggedRegressorImputer, self).__init__( imputer_class=BaggingRegressor, cols=cols, predictors=predictors, base_estimator=base_estimator, n_estimators=n_estimators, max_samples=max_samples, max_features=max_features, bootstrap=bootstrap, bootstrap_features=bootstrap_features, n_jobs=n_jobs, random_state=random_state, verbose=verbose, tmp_fill=tmp_fill, as_df=as_df)
Example #14
Source File: BaggingRegressor.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, floats=['max_samples', 'max_features'], bools=['bootstrap', 'bootstrap_features', 'oob_score', 'warm_start'], ints=['n_estimators'], ) self.estimator = _BaggingRegressor(**out_params)
Example #15
Source File: baggingmodel.py From Supply-demand-forecasting with MIT License | 5 votes |
def setClf(self): # min_samples_split = 3 self.clf = BaggingRegressor(n_estimators = 100, max_samples =0.5, max_features =0.5, verbose = 100) return
Example #16
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_oob_score_regression(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
Example #17
Source File: models.py From automl-phase-2 with MIT License | 5 votes |
def __init__(self, info, verbose=True, debug_mode=False): self.label_num=info['label_num'] self.target_num=info['target_num'] self.task = info['task'] self.metric = info['metric'] self.postprocessor = None #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba if debug_mode>=2: self.name = "RandomPredictor" self.model = RandomPredictor(self.target_num) self.predict_method = self.model.predict_proba return if info['task']=='regression': if info['is_sparse']==True: self.name = "BaggingRidgeRegressor" self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingRegressor" self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True) self.predict_method = self.model.predict # Always predict probabilities else: if info['has_categorical']: # Out of lazziness, we do not convert categorical variables... self.name = "RandomForestClassifier" self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start... elif info['is_sparse']: self.name = "BaggingNBClassifier" self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start... else: self.name = "GradientBoostingClassifier" self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)") if info['task']=='multilabel.classification': self.model = MultiLabelEnsemble(self.model) self.predict_method = self.model.predict_proba
Example #18
Source File: ml_regressor.py From rampy with GNU General Public License v2.0 | 5 votes |
def fit(self): """Scale data and train the model with the indicated algorithm. Do not forget to tune the hyperparameters. Parameters ---------- algorithm : String, "KernelRidge", "SVM", "LinearRegression", "Lasso", "ElasticNet", "NeuralNet", "BaggingNeuralNet", default = "SVM" """ self.X_scaler.fit(self.X_train) self.Y_scaler.fit(self.y_train) # scaling the data in all cases, it may not be used during the fit later self.X_train_sc = self.X_scaler.transform(self.X_train) self.y_train_sc = self.Y_scaler.transform(self.y_train) self.X_test_sc = self.X_scaler.transform(self.X_test) self.y_test_sc = self.Y_scaler.transform(self.y_test) if self.algorithm == "KernelRidge": clf_kr = KernelRidge(kernel=self.user_kernel) self.model = sklearn.model_selection.GridSearchCV(clf_kr, cv=5, param_grid=self.param_kr) elif self.algorithm == "SVM": clf_svm = SVR(kernel=self.user_kernel) self.model = sklearn.model_selection.GridSearchCV(clf_svm, cv=5, param_grid=self.param_svm) elif self.algorithm == "Lasso": clf_lasso = sklearn.linear_model.Lasso(alpha=0.1,random_state=self.rand_state) self.model = sklearn.model_selection.GridSearchCV(clf_lasso, cv=5, param_grid=dict(alpha=np.logspace(-5,5,30))) elif self.algorithm == "ElasticNet": clf_ElasticNet = sklearn.linear_model.ElasticNet(alpha=0.1, l1_ratio=0.5,random_state=self.rand_state) self.model = sklearn.model_selection.GridSearchCV(clf_ElasticNet,cv=5, param_grid=dict(alpha=np.logspace(-5,5,30))) elif self.algorithm == "LinearRegression": self.model = sklearn.linear_model.LinearRegression() elif self.algorithm == "NeuralNet": self.model = MLPRegressor(**self.param_neurons) elif self.algorithm == "BaggingNeuralNet": nn_m = MLPRegressor(**self.param_neurons) self.model = BaggingRegressor(base_estimator = nn_m, **self.param_bag) if self.scaling == True: self.model.fit(self.X_train_sc, self.y_train_sc.reshape(-1,)) predict_train_sc = self.model.predict(self.X_train_sc) self.prediction_train = self.Y_scaler.inverse_transform(predict_train_sc.reshape(-1,1)) predict_test_sc = self.model.predict(self.X_test_sc) self.prediction_test = self.Y_scaler.inverse_transform(predict_test_sc.reshape(-1,1)) else: self.model.fit(self.X_train, self.y_train.reshape(-1,)) self.prediction_train = self.model.predict(self.X_train) self.prediction_test = self.model.predict(self.X_test)
Example #19
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_bagging_regressor_with_missing_inputs(): # Check that BaggingRegressor can accept X with missing/infinite data X = np.array([ [1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6], ]) y_values = [ np.array([2, 3, 3, 3, 3]), np.array([ [2, 1, 9], [3, 6, 8], [3, 6, 8], [3, 6, 8], [3, 6, 8], ]) ] for y in y_values: regressor = DecisionTreeRegressor() pipeline = make_pipeline( FunctionTransformer(replace, validate=False), regressor ) pipeline.fit(X, y).predict(X) bagging_regressor = BaggingRegressor(pipeline) y_hat = bagging_regressor.fit(X, y).predict(X) assert_equal(y.shape, y_hat.shape) # Verify that exceptions can be raised by wrapper regressor regressor = DecisionTreeRegressor() pipeline = make_pipeline(regressor) assert_raises(ValueError, pipeline.fit, X, y) bagging_regressor = BaggingRegressor(pipeline) assert_raises(ValueError, bagging_regressor.fit, X, y)
Example #20
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_oob_score_regression(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
Example #21
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_base_estimator(): # Check base_estimator and its default values. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier)) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier)) ensemble = BaggingClassifier(Perceptron(tol=1e-3), n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, Perceptron)) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor)) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor)) ensemble = BaggingRegressor(SVR(), n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, SVR))
Example #22
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_sparse_regression(): # Check regression for various parameter settings on sparse input. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) class CustomSVR(SVR): """SVC variant that records the nature of the training set""" def fit(self, X, y): super(CustomSVR, self).fit(X, y) self.data_type_ = type(X) return self parameter_sets = [ {"max_samples": 0.5, "max_features": 2, "bootstrap": True, "bootstrap_features": True}, {"max_samples": 1.0, "max_features": 4, "bootstrap": True, "bootstrap_features": True}, {"max_features": 2, "bootstrap": False, "bootstrap_features": True}, {"max_samples": 0.5, "bootstrap": True, "bootstrap_features": False}, ] for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in parameter_sets: # Trained on sparse format sparse_classifier = BaggingRegressor( base_estimator=CustomSVR(), random_state=1, **params ).fit(X_train_sparse, y_train) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_results = BaggingRegressor( base_estimator=CustomSVR(), random_state=1, **params ).fit(X_train, y_train).predict(X_test) sparse_type = type(X_train_sparse) types = [i.data_type_ for i in sparse_classifier.estimators_] assert_array_equal(sparse_results, dense_results) assert all([t == sparse_type for t in types]) assert_array_equal(sparse_results, dense_results)
Example #23
Source File: pca_regression.py From AmusingPythonCodes with MIT License | 4 votes |
def lets_try(train, labels): results = {} def test_model(clf): cv = KFold(n_splits=5, shuffle=True, random_state=45) r2 = make_scorer(r2_score) r2_val_score = cross_val_score(clf, train, labels, cv=cv, scoring=r2) scores = [r2_val_score.mean()] return scores clf = linear_model.LinearRegression() results["Linear"] = test_model(clf) clf = linear_model.Ridge() results["Ridge"] = test_model(clf) clf = linear_model.BayesianRidge() results["Bayesian Ridge"] = test_model(clf) clf = linear_model.HuberRegressor() results["Hubber"] = test_model(clf) clf = linear_model.Lasso(alpha=1e-4) results["Lasso"] = test_model(clf) clf = BaggingRegressor() results["Bagging"] = test_model(clf) clf = RandomForestRegressor() results["RandomForest"] = test_model(clf) clf = AdaBoostRegressor() results["AdaBoost"] = test_model(clf) clf = svm.SVR() results["SVM RBF"] = test_model(clf) clf = svm.SVR(kernel="linear") results["SVM Linear"] = test_model(clf) results = pd.DataFrame.from_dict(results, orient='index') results.columns = ["R Square Score"] # results = results.sort(columns=["R Square Score"], ascending=False) results.plot(kind="bar", title="Model Scores") axes = plt.gca() axes.set_ylim([0.5, 1]) return results
Example #24
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_base_estimator(): # Check base_estimator and its default values. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(Perceptron(tol=1e-3), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, Perceptron) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(SVR(gamma='scale'), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, SVR)
Example #25
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_sparse_regression(): # Check regression for various parameter settings on sparse input. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) class CustomSVR(SVR): """SVC variant that records the nature of the training set""" def fit(self, X, y): super().fit(X, y) self.data_type_ = type(X) return self parameter_sets = [ {"max_samples": 0.5, "max_features": 2, "bootstrap": True, "bootstrap_features": True}, {"max_samples": 1.0, "max_features": 4, "bootstrap": True, "bootstrap_features": True}, {"max_features": 2, "bootstrap": False, "bootstrap_features": True}, {"max_samples": 0.5, "bootstrap": True, "bootstrap_features": False}, ] for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in parameter_sets: # Trained on sparse format sparse_classifier = BaggingRegressor( base_estimator=CustomSVR(gamma='scale'), random_state=1, **params ).fit(X_train_sparse, y_train) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_results = BaggingRegressor( base_estimator=CustomSVR(gamma='scale'), random_state=1, **params ).fit(X_train, y_train).predict(X_test) sparse_type = type(X_train_sparse) types = [i.data_type_ for i in sparse_classifier.estimators_] assert_array_almost_equal(sparse_results, dense_results) assert all([t == sparse_type for t in types]) assert_array_almost_equal(sparse_results, dense_results)