Python sklearn.ensemble.ExtraTreesRegressor() Examples
The following are 19
code examples of sklearn.ensemble.ExtraTreesRegressor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: RandomForest.py From pyGPGO with MIT License | 6 votes |
def fit(self, X, y): """ Fit a Random Forest model to data `X` and targets `y`. Parameters ---------- X : array-like Input values. y: array-like Target values. """ self.X = X self.y = y self.n = self.X.shape[0] self.model = ExtraTreesRegressor(**self.params) self.model.fit(X, y)
Example #2
Source File: extra_trees.py From mljar-supervised with MIT License | 6 votes |
def __init__(self, params): super(ExtraTreesRegressorAlgorithm, self).__init__(params) logger.debug("ExtraTreesRegressorAlgorithm.__init__") self.library_version = sklearn.__version__ self.trees_in_step = regression_additional.get("trees_in_step", 100) self.max_steps = regression_additional.get("max_steps", 50) self.early_stopping_rounds = regression_additional.get( "early_stopping_rounds", 50 ) self.model = ExtraTreesRegressor( n_estimators=self.trees_in_step, criterion=params.get("criterion", "mse"), max_features=params.get("max_features", 0.8), min_samples_split=params.get("min_samples_split", 4), warm_start=True, n_jobs=-1, random_state=params.get("seed", 1), )
Example #3
Source File: machine_learning_setup.py From OpenOA with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, algorithm, params = None): ''' Initialize the class with a list of possible algorithms and recommended hyperparameter ranges ''' if algorithm == 'etr': # Extra trees regressor from sklearn.ensemble import ExtraTreesRegressor self.hyper_range = {"max_depth": [4, 8, 12, 16, 20], "min_samples_split": np.arange(2, 11), "min_samples_leaf": np.arange(1, 11), "n_estimators": np.arange(10,801,40)} self.algorithm = ExtraTreesRegressor() elif algorithm == 'gbm': # Gradient boosting model from sklearn.ensemble import GradientBoostingRegressor self.hyper_range = {"max_depth": [4, 8, 12, 16, 20], "min_samples_split": np.arange(2, 11), "min_samples_leaf": np.arange(1, 11), "n_estimators": np.arange(10,801,40)} self.algorithm = GradientBoostingRegressor() elif algorithm == 'gam': # Generalized additive model from pygam import GAM self.hyper_range = {'n_splines': np.arange(5,40)} self.algorithm = GAM() # Set scorer as R2 self.my_scorer = make_scorer(r2_score, greater_is_better = True)
Example #4
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)
Example #5
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert_equal(tree.min_impurity_split, 0.1)
Example #6
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.ensemble.AdaBoostClassifier, ensemble.AdaBoostClassifier) self.assertIs(df.ensemble.AdaBoostRegressor, ensemble.AdaBoostRegressor) self.assertIs(df.ensemble.BaggingClassifier, ensemble.BaggingClassifier) self.assertIs(df.ensemble.BaggingRegressor, ensemble.BaggingRegressor) self.assertIs(df.ensemble.ExtraTreesClassifier, ensemble.ExtraTreesClassifier) self.assertIs(df.ensemble.ExtraTreesRegressor, ensemble.ExtraTreesRegressor) self.assertIs(df.ensemble.GradientBoostingClassifier, ensemble.GradientBoostingClassifier) self.assertIs(df.ensemble.GradientBoostingRegressor, ensemble.GradientBoostingRegressor) self.assertIs(df.ensemble.IsolationForest, ensemble.IsolationForest) self.assertIs(df.ensemble.RandomForestClassifier, ensemble.RandomForestClassifier) self.assertIs(df.ensemble.RandomTreesEmbedding, ensemble.RandomTreesEmbedding) self.assertIs(df.ensemble.RandomForestRegressor, ensemble.RandomForestRegressor) self.assertIs(df.ensemble.VotingClassifier, ensemble.VotingClassifier)
Example #7
Source File: model.py From numerox with GNU General Public License v3.0 | 5 votes |
def fit_predict(self, dfit, dpre, tournament): clf = ETC(criterion='mse', max_features=self.p['nfeatures'], max_depth=self.p['depth'], n_estimators=self.p['ntrees'], random_state=self.p['seed'], n_jobs=-1) clf.fit(dfit.x, dfit.y[tournament]) yhat = clf.predict(dpre.x) return dpre.ids, yhat
Example #8
Source File: baselines.py From AirBnbPricePrediction with MIT License | 5 votes |
def get_ensemble_models(): rf = RandomForestRegressor( n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42, n_jobs=int(0.8*n_cores)) bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores)) extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores)) ada = AdaBoostRegressor(random_state=42) grad = GradientBoostingRegressor(n_estimators=101, random_state=42) classifier_list = [rf, bag, extra, ada, grad] classifier_name_list = ['Random Forests', 'Bagging', 'Extra Trees', 'AdaBoost', 'Gradient Boost'] return classifier_list, classifier_name_list
Example #9
Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0 | 5 votes |
def test_export_pipeline_5(): """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel.""" pipeline_string = ( 'DecisionTreeRegressor(SelectFromModel(input_matrix, ' 'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, ' 'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,' 'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset) expected_code = """import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeRegressor # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'], random_state=None) exported_pipeline = make_pipeline( SelectFromModel(estimator=ExtraTreesRegressor(max_features=0.05, n_estimators=100), threshold=0.05), DecisionTreeRegressor(max_depth=8, min_samples_leaf=5, min_samples_split=5) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)
Example #10
Source File: ExtraTreesRegressor.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, floats=['max_samples', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'min_impurity_split'], bools=['bootstrap', 'oob_score', 'warm_start'], ints=['n_estimators', 'max_depth', 'max_leaf_nodes', 'min_impurity_decrease'], strs=['criterion'], ) self.estimator = _ExtraTreesRegressor(**out_params)
Example #11
Source File: RandomForest.py From pyGPGO with MIT License | 5 votes |
def __init__(self, **params): """ Wrapper around sklearn's ExtraTreesRegressor implementation for pyGPGO. Random Forests can also be used for surrogate models in Bayesian Optimization. An estimate of 'posterior' variance can be obtained by using the `impurity` criterion value in each subtree. Parameters ---------- params: tuple, optional Any parameters to pass to `RandomForestRegressor`. Defaults to sklearn's. """ self.params = params
Example #12
Source File: extra_trees.py From driverlessai-recipes with Apache License 2.0 | 5 votes |
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): orig_cols = list(X.names) if self.num_classes >= 2: lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) model = ExtraTreesClassifier(**self.params) else: model = ExtraTreesRegressor(**self.params) # Replace missing values with a value smaller than all observed values self.min = dict() for col in X.names: XX = X[:, col] self.min[col] = XX.min1() if self.min[col] is None or np.isnan(self.min[col]): self.min[col] = -1e10 else: self.min[col] -= 1 XX.replace(None, self.min[col]) X[:, col] = XX assert X[dt.isna(dt.f[col]), col].nrows == 0 X = X.to_numpy() model.fit(X, y) importances = np.array(model.feature_importances_) self.set_model_properties(model=model, features=orig_cols, importances=importances.tolist(), iterations=self.params['n_estimators'])
Example #13
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)
Example #14
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert_equal(tree.min_impurity_split, 0.1)
Example #15
Source File: task.py From kaggle-HomeDepot with MIT License | 4 votes |
def _get_learner(self): # xgboost if self.learner_name in ["reg_xgb_linear", "reg_xgb_tree", "reg_xgb_tree_best_single_model"]: return XGBRegressor(**self.param_dict) if self.learner_name in ["clf_xgb_linear", "clf_xgb_tree"]: return XGBClassifier(**self.param_dict) # sklearn if self.learner_name == "reg_skl_lasso": return Lasso(**self.param_dict) if self.learner_name == "reg_skl_ridge": return Ridge(**self.param_dict) if self.learner_name == "reg_skl_random_ridge": return RandomRidge(**self.param_dict) if self.learner_name == "reg_skl_bayesian_ridge": return BayesianRidge(**self.param_dict) if self.learner_name == "reg_skl_svr": return SVR(**self.param_dict) if self.learner_name == "reg_skl_lsvr": return LinearSVR(**self.param_dict) if self.learner_name == "reg_skl_knn": return KNNRegressor(**self.param_dict) if self.learner_name == "reg_skl_etr": return ExtraTreesRegressor(**self.param_dict) if self.learner_name == "reg_skl_rf": return RandomForestRegressor(**self.param_dict) if self.learner_name == "reg_skl_gbm": return GradientBoostingRegressor(**self.param_dict) if self.learner_name == "reg_skl_adaboost": return AdaBoostRegressor(**self.param_dict) # keras if self.learner_name == "reg_keras_dnn": try: return KerasDNNRegressor(**self.param_dict) except: return None # rgf if self.learner_name == "reg_rgf": return RGFRegressor(**self.param_dict) # ensemble if self.learner_name == "reg_ensemble": return EnsembleLearner(**self.param_dict) return None
Example #16
Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License | 4 votes |
def predict(model, X, joint_contribution=False): """ Returns a triple (prediction, bias, feature_contributions), such that prediction ≈ bias + feature_contributions. Parameters ---------- model : DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeRegressor, ExtraTreeClassifier, RandomForestRegressor, RandomForestClassifier, ExtraTreesRegressor, ExtraTreesClassifier Scikit-learn model on which the prediction should be decomposed. X : array-like, shape = (n_samples, n_features) Test samples. joint_contribution : boolean Specifies if contributions are given individually from each feature, or jointly over them Returns ------- decomposed prediction : triple of * prediction, shape = (n_samples) for regression and (n_samples, n_classes) for classification * bias, shape = (n_samples) for regression and (n_samples, n_classes) for classification * contributions, If joint_contribution is False then returns and array of shape = (n_samples, n_features) for regression or shape = (n_samples, n_features, n_classes) for classification, denoting contribution from each feature. If joint_contribution is True, then shape is array of size n_samples, where each array element is a dict from a tuple of feature indices to to a value denoting the contribution from that feature tuple. """ # Only single out response variable supported, if model.n_outputs_ > 1: raise ValueError("Multilabel classification trees not supported") if (isinstance(model, DecisionTreeClassifier) or isinstance(model, DecisionTreeRegressor)): return _predict_tree(model, X, joint_contribution=joint_contribution) elif (isinstance(model, RandomForestClassifier) or isinstance(model, ExtraTreesClassifier) or isinstance(model, RandomForestRegressor) or isinstance(model, ExtraTreesRegressor)): return _predict_forest(model, X, joint_contribution=joint_contribution) else: raise ValueError("Wrong model type. Base learner needs to be a " "DecisionTreeClassifier or DecisionTreeRegressor.")
Example #17
Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _predict_forest(model, X, joint_contribution=False): """ For a given RandomForestRegressor, RandomForestClassifier, ExtraTreesRegressor, or ExtraTreesClassifier returns a triple of [prediction, bias and feature_contributions], such that prediction ≈ bias + feature_contributions. """ if joint_contribution: biases = [] contributions = [] predictions = [] for tree in model.estimators_: pred, bias, contribution = _predict_tree(tree, X, joint_contribution=joint_contribution) biases.append(bias) contributions.append(contribution) predictions.append(pred) total_contributions = [] for i in range(len(X)): contr = {} for j, dct in enumerate(contributions): for k in set(dct[i]).union(set(contr.keys())): contr[k] = (contr.get(k, 0)*j + dct[i].get(k,0) ) / (j+1) total_contributions.append(contr) for i, item in enumerate(contribution): total_contributions[i] sm = sum([v for v in contribution[i].values()]) return (np.mean(predictions, axis=0), np.mean(biases, axis=0), total_contributions) else: mean_pred = None mean_bias = None mean_contribution = None for i, tree in enumerate(model.estimators_): pred, bias, contribution = _predict_tree(tree, X) if i < 1: # first iteration mean_bias = bias mean_contribution = contribution mean_pred = pred else: mean_bias = _iterative_mean(i, mean_bias, bias) mean_contribution = _iterative_mean(i, mean_contribution, contribution) mean_pred = _iterative_mean(i, mean_pred, pred) return mean_pred, mean_bias, mean_contribution
Example #18
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_distribution(): rng = check_random_state(12321) # Single variable with 4 values X = rng.randint(0, 4, size=(1000, 1)) y = rng.rand(1000) n_trees = 500 clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = sorted([(1. * count / n_trees, tree) for tree, count in uniques.items()]) # On a single variable problem where X_0 has 4 equiprobable values, there # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of # them has probability 1/3 while the 4 others have probability 1/6. assert_equal(len(uniques), 5) assert_greater(0.20, uniques[0][0]) # Rough approximation of 1/6. assert_greater(0.20, uniques[1][0]) assert_greater(0.20, uniques[2][0]) assert_greater(0.20, uniques[3][0]) assert_greater(uniques[4][0], 0.3) assert_equal(uniques[4][1], "0,1/0,0/--0,2/--") # Two variables, one with 2 values, one with 3 values X = np.empty((1000, 2)) X[:, 0] = np.random.randint(0, 2, 1000) X[:, 1] = np.random.randint(0, 3, 1000) y = rng.rand(1000) clf = ExtraTreesRegressor(n_estimators=100, max_features=1, random_state=1).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = [(count, tree) for tree, count in uniques.items()] assert_equal(len(uniques), 8)
Example #19
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_distribution(): rng = check_random_state(12321) # Single variable with 4 values X = rng.randint(0, 4, size=(1000, 1)) y = rng.rand(1000) n_trees = 500 clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = sorted([(1. * count / n_trees, tree) for tree, count in uniques.items()]) # On a single variable problem where X_0 has 4 equiprobable values, there # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of # them has probability 1/3 while the 4 others have probability 1/6. assert_equal(len(uniques), 5) assert_greater(0.20, uniques[0][0]) # Rough approximation of 1/6. assert_greater(0.20, uniques[1][0]) assert_greater(0.20, uniques[2][0]) assert_greater(0.20, uniques[3][0]) assert_greater(uniques[4][0], 0.3) assert_equal(uniques[4][1], "0,1/0,0/--0,2/--") # Two variables, one with 2 values, one with 3 values X = np.empty((1000, 2)) X[:, 0] = np.random.randint(0, 2, 1000) X[:, 1] = np.random.randint(0, 3, 1000) y = rng.rand(1000) clf = ExtraTreesRegressor(n_estimators=100, max_features=1, random_state=1).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = [(count, tree) for tree, count in uniques.items()] assert_equal(len(uniques), 8)