Python sklearn.ensemble.GradientBoostingRegressor() Examples
The following are 30
code examples of sklearn.ensemble.GradientBoostingRegressor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: friedman_scores.py From mlens with MIT License | 7 votes |
def build_ensemble(**kwargs): """Generate ensemble.""" ens = SuperLearner(**kwargs) prep = {'Standard Scaling': [StandardScaler()], 'Min Max Scaling': [MinMaxScaler()], 'No Preprocessing': []} est = {'Standard Scaling': [ElasticNet(), Lasso(), KNeighborsRegressor()], 'Min Max Scaling': [SVR()], 'No Preprocessing': [RandomForestRegressor(random_state=SEED), GradientBoostingRegressor()]} ens.add(est, prep) ens.add(GradientBoostingRegressor(), meta=True) return ens
Example #2
Source File: test_tree.py From bartpy with MIT License | 6 votes |
def test_same_prediction(self): from sklearn.ensemble import GradientBoostingRegressor params = {'n_estimators': 1, 'max_depth': 2, 'min_samples_split': 2, 'learning_rate': 0.8, 'loss': 'ls'} sklearn_model = GradientBoostingRegressor(**params) sklearn_model.fit(self.data.X.values, self.data.y.values) sklearn_tree = sklearn_model.estimators_[0][0].tree_ bartpy_tree = Tree([LeafNode(Split(self.data))]) map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree) sklearn_predictions = sklearn_tree.predict(self.data.X.values.astype(np.float32)) sklearn_predictions = [round(x, 2) for x in sklearn_predictions.reshape(-1)] bartpy_tree.cache_up_to_date = False bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values) bartpy_tree_predictions = [round(x, 2) for x in bartpy_tree_predictions] self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
Example #3
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_regressor_parameter_checks(): # Check input parameter validation for GradientBoostingRegressor assert_raise_message(ValueError, "alpha must be in (0.0, 1.0) but was 1.2", GradientBoostingRegressor(loss='huber', alpha=1.2) .fit, X, y) assert_raise_message(ValueError, "alpha must be in (0.0, 1.0) but was 1.2", GradientBoostingRegressor(loss='quantile', alpha=1.2) .fit, X, y) assert_raise_message(ValueError, "Invalid value for max_features: " "'invalid'. Allowed string values are 'auto', 'sqrt'" " or 'log2'.", GradientBoostingRegressor(max_features='invalid').fit, X, y) assert_raise_message(ValueError, "n_iter_no_change should either be None" " or an integer. 'invalid' was passed", GradientBoostingRegressor(n_iter_no_change='invalid') .fit, X, y) allowed_presort = ('auto', True, False) assert_raise_message(ValueError, "'presort' should be in {}. " "Got 'invalid' instead.".format(allowed_presort), GradientBoostingRegressor(presort='invalid') .fit, X, y)
Example #4
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_GradientBoostingRegression(self): boston = datasets.load_boston() df = pdml.ModelFrame(boston) params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 0.9, 'learning_rate': 0.01, 'loss': 'ls', 'random_state': self.random_state} clf1 = ensemble.GradientBoostingRegressor(**params) clf2 = df.ensemble.GradientBoostingRegressor(**params) clf1.fit(boston.data, boston.target) df.fit(clf2) expected = clf1.predict(boston.data) predicted = df.predict(clf2) self.assertIsInstance(predicted, pdml.ModelSeries) self.assert_numpy_array_almost_equal(predicted.values, expected) self.assertAlmostEqual(df.metrics.mean_squared_error(), metrics.mean_squared_error(boston.target, expected))
Example #5
Source File: test_sklearn_glm_regressor_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_ransac_regressor_tree(self): model, X = fit_regression_model( linear_model.RANSACRegressor( base_estimator=GradientBoostingRegressor())) model_onnx = convert_sklearn( model, "ransac regressor", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, verbose=False, basename="SklearnRANSACRegressorTree-Dec3", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #6
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_staged_predict(): # Test whether staged decision function eventually gives # the same prediction. X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test = X[200:] clf = GradientBoostingRegressor() # test raise ValueError if not fitted assert_raises(ValueError, lambda X: np.fromiter( clf.staged_predict(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # test if prediction for last stage equals ``predict`` for y in clf.staged_predict(X_test): assert_equal(y.shape, y_pred.shape) assert_array_almost_equal(y_pred, y)
Example #7
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_check_inputs_predict(): # X has wrong shape clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) x = np.array([1.0, 2.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) x = np.array([[]]) assert_raises(ValueError, clf.predict, x) x = np.array([1.0, 2.0, 3.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) clf = GradientBoostingRegressor(n_estimators=100, random_state=1) clf.fit(X, rng.rand(len(X))) x = np.array([1.0, 2.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) x = np.array([[]]) assert_raises(ValueError, clf.predict, x) x = np.array([1.0, 2.0, 3.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x)
Example #8
Source File: test_partial_dependence.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_partial_dependence_sample_weight(): # Test near perfect correlation between partial dependence and diagonal # when sample weights emphasize y = x predictions N = 1000 rng = np.random.RandomState(123456) mask = rng.randint(2, size=N, dtype=bool) x = rng.rand(N) # set y = x on mask and y = -x outside y = x.copy() y[~mask] = -y[~mask] X = np.c_[mask, x] # sample weights to emphasize data points where y = x sample_weight = np.ones(N) sample_weight[mask] = 1000. clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(X, y, sample_weight=sample_weight) grid = np.arange(0, 1, 0.01) pdp = partial_dependence(clf, [1], grid=grid) assert np.corrcoef(np.ravel(pdp[0]), grid)[0, 1] > 0.99
Example #9
Source File: friedman.py From Hands-On-Genetic-Algorithms-with-Python with MIT License | 6 votes |
def __init__(self, numFeatures, numSamples, randomSeed): """ :param numFeatures: total number of features to be used (at least 5) :param numSamples: number of samples in dataset :param randomSeed: random seed value used for reproducible results """ self.numFeatures = numFeatures self.numSamples = numSamples self.randomSeed = randomSeed # generate test data: self.X, self.y = datasets.make_friedman1(n_samples=self.numSamples, n_features=self.numFeatures, noise=self.NOISE, random_state=self.randomSeed) # divide the data to a training set and a validation set: self.X_train, self.X_validation, self.y_train, self.y_validation = \ model_selection.train_test_split(self.X, self.y, test_size=self.VALIDATION_SIZE, random_state=self.randomSeed) self.regressor = GradientBoostingRegressor(random_state=self.randomSeed)
Example #10
Source File: test_standardization.py From causallib with Apache License 2.0 | 6 votes |
def ensure_many_models(self): from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR, LinearSVR import warnings from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings('ignore', category=ConvergenceWarning) for learner in [GradientBoostingRegressor, RandomForestRegressor, MLPRegressor, ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor, KNeighborsRegressor, SVR, LinearSVR]: learner = learner() learner_name = str(learner).split("(", maxsplit=1)[0] with self.subTest("Test fit using {learner}".format(learner=learner_name)): model = self.estimator.__class__(learner) model.fit(self.data_lin["X"], self.data_lin["a"], self.data_lin["y"]) self.assertTrue(True) # Fit did not crash
Example #11
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start(Cls): # Test if warm start equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) est = Cls(n_estimators=200, max_depth=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=200) est_ws.fit(X, y) if Cls is GradientBoostingRegressor: assert_array_almost_equal(est_ws.predict(X), est.predict(X)) else: # Random state is preserved and hence predict_proba must also be # same assert_array_equal(est_ws.predict(X), est.predict(X)) assert_array_almost_equal(est_ws.predict_proba(X), est.predict_proba(X))
Example #12
Source File: GradientBoostingRegressor.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, strs=['loss', 'max_features'], floats=['learning_rate', 'min_weight_fraction_leaf', 'alpha', 'subsample'], ints=['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'random_state'], ) valid_loss = ['ls', 'lad', 'huber', 'quantile'] if 'loss' in out_params: if out_params['loss'] not in valid_loss: msg = "loss must be one of: {}".format(', '.join(valid_loss)) raise RuntimeError(msg) if 'max_features' in out_params: out_params['max_features'] = handle_max_features(out_params['max_features']) self.estimator = _GradientBoostingRegressor(**out_params)
Example #13
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gradient_boosting_with_init(gb, dataset_maker, init_estimator): # Check that GradientBoostingRegressor works when init is a sklearn # estimator. # Check that an error is raised if trying to fit with sample weight but # inital estimator does not support sample weight X, y = dataset_maker() sample_weight = np.random.RandomState(42).rand(100) # init supports sample weights init_est = init_estimator() gb(init=init_est).fit(X, y, sample_weight=sample_weight) # init does not support sample weights init_est = _NoSampleWeightWrapper(init_estimator()) gb(init=init_est).fit(X, y) # ok no sample weights with pytest.raises(ValueError, match="estimator.*does not support sample weights"): gb(init=init_est).fit(X, y, sample_weight=sample_weight)
Example #14
Source File: test_full_pipelines.py From python-sasctl with Apache License 2.0 | 6 votes |
def test_register_model(self, boston_dataset): pytest.importorskip('sklearn') from sasctl import register_model from sklearn.ensemble import GradientBoostingRegressor TARGET = 'Price' X = boston_dataset.drop(TARGET, axis=1) y = boston_dataset[TARGET] model = GradientBoostingRegressor() model.fit(X, y) model = register_model(model, self.MODEL_NAME, self.PROJECT_NAME, input=X, force=True) assert model.name == self.MODEL_NAME assert model.projectName == self.PROJECT_NAME assert model.function.lower() == 'prediction' assert model.algorithm.lower() == 'gradient boosting' assert model.targetLevel.lower() == 'interval' assert model.tool.lower().startswith('python')
Example #15
Source File: model.py From bitpredict with MIT License | 6 votes |
def grid_search(X, y, split, learn=[.01], samples_leaf=[250, 350, 500], depth=[10, 15]): ''' Runs a grid search for GBM on split data ''' for l in learn: for s in samples_leaf: for d in depth: model = GradientBoostingRegressor(n_estimators=250, learning_rate=l, min_samples_leaf=s, max_depth=d, random_state=42) model.fit(X.values[:split], y.values[:split]) in_score = model.score(X.values[:split], y.values[:split]) out_score = model.score(X.values[split:], y.values[split:]) print 'learning_rate: {}, min_samples_leaf: {}, max_depth: {}'.\ format(l, s, d) print 'in-sample score:', in_score print 'out-sample score:', out_score print ''
Example #16
Source File: classifier.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def train_model(self, train_file_path, model_path): print("==> Load the data ...") X_train, Y_train = self.load_file(train_file_path) print(train_file_path, shape(X_train)) print("==> Train the model ...") min_max_scaler = preprocessing.MaxAbsScaler() X_train_minmax = min_max_scaler.fit_transform(X_train) clf = GradientBoostingRegressor(n_estimators=self.n_estimators) clf.fit(X_train_minmax.toarray(), Y_train) print("==> Save the model ...") pickle.dump(clf, open(model_path, 'wb')) scaler_path = model_path.replace('.pkl', '.scaler.pkl') pickle.dump(min_max_scaler, open(scaler_path, 'wb')) return clf
Example #17
Source File: model.py From CryptoBot with Apache License 2.0 | 6 votes |
def grid_search(X, y, split, learn=[.01], samples_leaf=[250, 350, 500], depth=[10, 15]): ''' Runs a grid search for GBM on split data ''' for l in learn: for s in samples_leaf: for d in depth: model = GradientBoostingRegressor(n_estimators=250, learning_rate=l, min_samples_leaf=s, max_depth=d, random_state=42) model.fit(X.values[:split], y.values[:split]) in_score = model.score(X.values[:split], y.values[:split]) out_score = model.score(X.values[split:], y.values[split:]) print 'learning_rate: {}, min_samples_leaf: {}, max_depth: {}'.\ format(l, s, d) print 'in-sample score:', in_score print 'out-sample score:', out_score print ''
Example #18
Source File: test_composite_pipelines.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_boston_OHE_plus_trees(self): data = load_boston() pl = Pipeline( [ ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)), ("Trees", GradientBoostingRegressor(random_state=1)), ] ) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, "target") if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(data.data, columns=data.feature_names) df["prediction"] = pl.predict(data.data) # Evaluate it result = evaluate_regressor(spec, df, "target", verbose=False) assert result["max_error"] < 0.0001
Example #19
Source File: BoostedTrees.py From pyGPGO with MIT License | 6 votes |
def fit(self, X, y): """ Fit a GBM model to data `X` and targets `y`. Parameters ---------- X : array-like Input values. y: array-like Target values. """ self.X = X self.y = y self.n = self.X.shape[0] self.modq1 = GradientBoostingRegressor(loss='quantile', alpha=self.q1, **self.params) self.modq2 = GradientBoostingRegressor(loss='quantile', alpha=self.q2, **self.params) self.mod = GradientBoostingRegressor(loss = 'ls', **self.params) self.modq1.fit(self.X, self.y) self.modq2.fit(self.X, self.y) self.mod.fit(self.X, self.y)
Example #20
Source File: BoostedTrees.py From pyGPGO with MIT License | 6 votes |
def __init__(self, q1=.16, q2=.84,**params): """ Gradient boosted trees as surrogate model for Bayesian Optimization. Uses quantile regression for an estimate of the 'posterior' variance. In practice, the std is computed as (`q2` - `q1`) / 2. Relies on `sklearn.ensemble.GradientBoostingRegressor` Parameters ---------- q1: float First quantile. q2: float Second quantile params: tuple Extra parameters to pass to `GradientBoostingRegressor` """ self.params = params self.q1 = q1 self.q2 = q2 self.eps = 1e-1
Example #21
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multi_target_sample_weights(): # weighted regressor Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]] rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y) X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]] assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test)) # Import the data
Example #22
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multi_target_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test, y_test = X[50:], y[50:] references = np.zeros_like(y_test) for n in range(3): rgr = GradientBoostingRegressor(random_state=0) rgr.fit(X_train, y_train[:, n]) references[:, n] = rgr.predict(X_test) rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X_train, y_train) y_pred = rgr.predict(X_test) assert_almost_equal(references, y_pred) # 0.23. warning about tol not having its correct default value.
Example #23
Source File: para_classifier.py From TextDetector with GNU General Public License v3.0 | 6 votes |
def para_adaboost(data): ''' para_adaboost(data) kernel function for parallel computing adaboost classifier data: training data containing features and labels in a tuple Return: adaboost classifier model ''' model = GradientBoostingRegressor( learning_rate = 1, n_estimators = 1000, max_depth = 1, random_state = 0 ) patch, label = data model = model.fit(patch, label) return model #-----------------END: functions used for parallel computation--------------------------#
Example #24
Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License | 5 votes |
def GBR_stacking(y_train, X_train, X_test): param_grid = {'learning_rate': [0.1, 0.05, 0.01], 'max_depth': [2, 3, 4, 5], # [2, 3, 4, 6], 'min_samples_leaf': [1, 2, 3], # ,5, 7], 'max_features': [1.0, 0.5, 0.3, 0.1]} est = en.GradientBoostingRegressor(loss='ls', n_estimators=100) clf = GridSearchCV(est, param_grid, n_jobs=3, verbose=1, cv=20, scoring=spearman_scoring).fit(X_train, y_train.flatten()) # clf.fit(X_train, y_train.flatten()) return clf.predict(X_test)
Example #25
Source File: utils_scoring.py From auto_ml with MIT License | 5 votes |
def score(self, estimator, X, y, took_log_of_y=False, advanced_scoring=False, verbose=2, name=None): X, y = utils.drop_missing_y_vals(X, y, output_column=None) if isinstance(estimator, GradientBoostingRegressor): X = X.toarray() predictions = estimator.predict(X) if took_log_of_y: for idx, val in enumerate(predictions): predictions[idx] = math.exp(val) try: score = self.scoring_func(y, predictions) except ValueError: bad_val_indices = [] for idx, val in enumerate(y): if str(val) in bad_vals_as_strings or str(predictions[idx]) in bad_vals_as_strings: bad_val_indices.append(idx) predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices] y = [val for idx, val in enumerate(y) if idx not in bad_val_indices] print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the predicted or y values. We will ignore these, and report the score on the rest of the dataset') score = self.scoring_func(y, predictions) if advanced_scoring == True: if hasattr(estimator, 'name'): print(estimator.name) advanced_scoring_regressors(predictions, y, verbose=verbose, name=name) return - 1 * score
Example #26
Source File: gradientboostingmodel.py From Supply-demand-forecasting with MIT License | 5 votes |
def setClf(self): self.clf = GradientBoostingRegressor(n_estimators=100, verbose=100) # self.clf = GradientBoostingRegressor(loss = 'ls', verbose = 300, n_estimators=70, learning_rate= 0.1,subsample=1.0, max_features = 1.0) return
Example #27
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_plot_partial_dependence(self): df = pdml.ModelFrame(datasets.load_iris()) clf = df.ensemble.GradientBoostingRegressor(n_estimators=10) df.fit(clf) """ # ToDo: Check how to perform plotting test on travis, locally passed. fig, axes = df.ensemble.partial_dependence.plot_partial_dependence(clf, [0, (0, 1)]) import matplotlib self.assertIsInstance(fig, matplotlib.figure.Figure) self.assertTrue(len(axes), 2) self.assertIsInstance(axes[0], matplotlib.axes.Axes) """
Example #28
Source File: model.py From bitpredict with MIT License | 5 votes |
def fit_boosting(X, y, window=100000, estimators=250, learning=.01, samples_leaf=500, depth=20, validate=False): ''' Fits Gradient Boosting ''' model = GradientBoostingRegressor(n_estimators=estimators, learning_rate=learning, min_samples_leaf=samples_leaf, max_depth=depth, random_state=42) if validate: return cross_validate(X, y, model, window) return model.fit(X, y)
Example #29
Source File: test_partial_dependence.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_partial_dependence_regressor(): # Test partial dependence for regressor clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) grid_resolution = 25 pdp, axes = partial_dependence( clf, [0], X=boston.data, grid_resolution=grid_resolution) assert pdp.shape == (1, grid_resolution) assert axes[0].shape[0] == grid_resolution
Example #30
Source File: ABuMLCreater.py From abu with GNU General Public License v3.0 | 5 votes |
def xgb_regressor(self, assign=True, **kwargs): """ 有监督学习回归器,默认使用: GBR(n_estimators=100) 通过**kwargs即关键字参数透传GBR(**kwargs),即: GBR(**kwargs) 注意导入使用: try: from xgboost.sklearn import XGBRegressor as GBR except ImportError: from sklearn.ensemble import GradientBoostingRegressor as GBR :param assign: 是否保存实例后的回归器对象,默认True,self.reg = reg :param kwargs: 有参数情况下初始化: GBR(n_estimators=100) 无参数情况下初始化: GBR(**kwargs) :return: 实例化的GBR对象 """ if kwargs is not None and len(kwargs) > 0: reg = GBR(**kwargs) else: reg = GBR(n_estimators=100) if assign: self.reg = reg return reg