Python sklearn.ensemble.RandomForestRegressor() Examples
The following are 30
code examples of sklearn.ensemble.RandomForestRegressor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: friedman_scores.py From mlens with MIT License | 7 votes |
def build_ensemble(**kwargs): """Generate ensemble.""" ens = SuperLearner(**kwargs) prep = {'Standard Scaling': [StandardScaler()], 'Min Max Scaling': [MinMaxScaler()], 'No Preprocessing': []} est = {'Standard Scaling': [ElasticNet(), Lasso(), KNeighborsRegressor()], 'Min Max Scaling': [SVR()], 'No Preprocessing': [RandomForestRegressor(random_state=SEED), GradientBoostingRegressor()]} ens.add(est, prep) ens.add(GradientBoostingRegressor(), meta=True) return ens
Example #2
Source File: test_prediction.py From Pyspatialml with GNU General Public License v3.0 | 7 votes |
def test_regression(self): training_pt = gpd.read_file(ms.meuse) training = self.stack_meuse.extract_vector(gdf=training_pt) training["zinc"] = training_pt["zinc"] training["cadmium"] = training_pt["cadmium"] training["copper"] = training_pt["copper"] training["lead"] = training_pt["lead"] training = training.dropna() # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, self.stack_meuse.names] y = training["zinc"] regr.fit(X, y) single_regr = self.stack_meuse.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]] regr.fit(X, y) multi_regr = self.stack_meuse.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
Example #3
Source File: pm25_RF_Regression.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, treecount, tezh, yanzhgdata): model = RF(n_estimators=treecount, max_features=tezh) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = mse(data[:, -1], train_out) # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算MSE add_mse = mse(yanzhgdata[:, -1], add_yan) print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #4
Source File: models.py From jh-kaggle-util with Apache License 2.0 | 6 votes |
def run_sklearn(): n_trees = 100 n_folds = 3 # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/ alg_list = [ ['lreg',LinearRegression()], ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)], ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)], ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)], ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)] ] start_time = time.time() for name,alg in alg_list: train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False) train.run() train = None elapsed_time = time.time() - start_time print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))
Example #5
Source File: ensemble_glm.py From jh-kaggle-util with Apache License 2.0 | 6 votes |
def fit_ensemble(x,y): fit_type = jhkaggle.jhkaggle_config['FIT_TYPE'] if 1: if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION: blend = SGDClassifier(loss="log", penalty="elasticnet") # LogisticRegression() else: # blend = SGDRegressor() #blend = LinearRegression() #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae') blend = LassoLarsCV(normalize=True) #blend = ElasticNetCV(normalize=True) #blend = LinearRegression(normalize=True) blend.fit(x, y) else: blend = LogisticRegression() blend.fit(x, y) return blend
Example #6
Source File: classifier.py From Semantic-Texual-Similarity-Toolkits with MIT License | 6 votes |
def train_model(self, train_file_path, model_path): print("==> Load the data ...") X_train, Y_train = self.load_file(train_file_path) print(train_file_path, shape(X_train)) print("==> Train the model ...") min_max_scaler = preprocessing.MaxAbsScaler() X_train_minmax = min_max_scaler.fit_transform(X_train) clf = RandomForestRegressor(n_estimators=self.n_estimators) clf.fit(X_train_minmax.toarray(), Y_train) print("==> Save the model ...") pickle.dump(clf, open(model_path, 'wb')) scaler_path = model_path.replace('.pkl', '.scaler.pkl') pickle.dump(min_max_scaler, open(scaler_path, 'wb')) return clf
Example #7
Source File: strategy.py From bitpredict with MIT License | 6 votes |
def fit_and_trade(data, cols, split, threshold): ''' Fits and backtests a theoretical trading strategy ''' data = data[data.width > 0] X = data[cols] y = data.mid30 X_train = X.iloc[:split] X_test = X.iloc[split:] y_train = y.iloc[:split] y_test = y.iloc[split:] regressor = RandomForestRegressor(n_estimators=100, min_samples_leaf=500, random_state=42, n_jobs=-1) regressor.fit(X_train.values, y_train.values) trade(X_test.values, y_test.values, regressor, threshold)
Example #8
Source File: scorer.py From scan with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) self.fit_feats() self.fit_done = False
Example #9
Source File: random_forest.py From mljar-supervised with MIT License | 6 votes |
def __init__(self, params): super(RandomForestRegressorAlgorithm, self).__init__(params) logger.debug("RandomForestRegressorAlgorithm.__init__") self.library_version = sklearn.__version__ self.trees_in_step = regression_additional.get("trees_in_step", 5) self.max_steps = regression_additional.get("max_steps", 3) self.early_stopping_rounds = regression_additional.get( "early_stopping_rounds", 50 ) self.model = RandomForestRegressor( n_estimators=self.trees_in_step, criterion=params.get("criterion", "mse"), max_features=params.get("max_features", 0.8), min_samples_split=params.get("min_samples_split", 4), warm_start=True, n_jobs=-1, random_state=params.get("seed", 1), )
Example #10
Source File: test_stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_integration_regression(regression_test_data, n_jobs): """Construct, fit, and predict on realistic problem. Compare goodness of fit of stacked model vs. individual base estimators. """ xtrain = regression_test_data['x'] ytrain = regression_test_data['y'] xtest = regression_test_data['xtest'] ytest = regression_test_data['ytest'] sr = StackedRegressor([('rf', RandomForestRegressor(random_state=7, n_estimators=10)), ('lr', LinearRegression()), ('metalr', NonNegativeLinearRegression())], n_jobs=n_jobs) rf = RandomForestRegressor(random_state=7, n_estimators=10) lr = LinearRegression() sr_mse = fit_predict_measure_reg(sr, xtrain, ytrain, xtest, ytest) rf_mse = fit_predict_measure_reg(rf, xtrain, ytrain, xtest, ytest) lr_mse = fit_predict_measure_reg(lr, xtrain, ytrain, xtest, ytest) # Stacked regressor should perform better than its base estimators on this # data. assert sr_mse < rf_mse assert sr_mse < lr_mse assert sr_mse < 1.5 # Sanity check
Example #11
Source File: vanilla_model.py From OpenChem with MIT License | 6 votes |
def __init__(self, model_type='classifier', feature_type='fingerprints', n_estimators=100, n_ensemble=5): super(RandomForestQSAR, self).__init__() self.n_estimators = n_estimators self.n_ensemble = n_ensemble self.model = [] self.model_type = model_type if self.model_type == 'classifier': for i in range(n_ensemble): self.model.append(RFC(n_estimators=n_estimators)) elif self.model_type == 'regressor': for i in range(n_ensemble): self.model.append(RFR(n_estimators=n_estimators)) else: raise ValueError('invalid value for argument') self.feature_type = feature_type if self.feature_type == 'descriptors': self.calc = Calculator(descriptors, ignore_3D=True) self.desc_mean = [0]*self.n_ensemble
Example #12
Source File: test_stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_smoke_multiout_regression_methods(n_jobs): """Construct, fit, and predict on realistic problem. """ X, y = make_regression(random_state=7, n_samples=100, n_features=10, n_informative=4, n_targets=2) rng = np.random.RandomState(17) est_list = [('lr', LinearRegression()), ('rf', RandomForestRegressor(random_state=rng, n_estimators=10)), ('metalr', LinearRegression())] sm = StackedRegressor(est_list, n_jobs=n_jobs) sm.fit(X, y) sm.predict(X) sm.score(X, y) with pytest.raises(AttributeError): sm.predict_proba(X)
Example #13
Source File: test_overfit.py From deepchem with MIT License | 6 votes |
def test_sklearn_regression_overfit(self): """Test that sklearn models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) regression_metric = dc.metrics.Metric(dc.metrics.r2_score) sklearn_model = RandomForestRegressor() model = dc.models.SklearnModel(sklearn_model) # Fit trained model model.fit(dataset) model.save() # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] > .7
Example #14
Source File: test_stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_smoke_regression_methods(regression_test_data, n_jobs): """Construct, fit, and predict on realistic problem. """ xtrain = regression_test_data['x'] ytrain = regression_test_data['y'] rng = np.random.RandomState(17) est_list = [('lr', LinearRegression()), ('rf', RandomForestRegressor(random_state=rng, n_estimators=10)), ('nnls', NonNegativeLinearRegression())] sm = StackedRegressor(est_list, n_jobs=n_jobs) sm.fit(xtrain, ytrain) sm.predict(xtrain) sm.score(xtrain, ytrain) with pytest.raises(AttributeError): sm.predict_proba(xtrain)
Example #15
Source File: test_random_forest_regression_numeric.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = RandomForestRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, verbose=False) self._check_metrics(metrics, scikit_params)
Example #16
Source File: test_ensemble.py From m2cgen with MIT License | 6 votes |
def test_single_condition(): estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1) estimator.fit([[1], [2]], [1, 2]) assembler = assemblers.RandomForestModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(1.0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.NumVal(1.0), ast.NumVal(2.0)), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL) assert utils.cmp_exprs(actual, expected)
Example #17
Source File: regression_randomForest.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def regression_rf(x,y): ''' Estimate a random forest regressor ''' # create the regressor object random_forest = en.RandomForestRegressor( min_samples_split=80, random_state=666, max_depth=5, n_estimators=10) # estimate the model random_forest.fit(x,y) # return the object return random_forest # the file name of the dataset
Example #18
Source File: test_io_types.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_random_forest_regressor(self): for dtype in self.number_data_type.keys(): scikit_model = RandomForestRegressor(random_state=1) data = self.scikit_data["data"].astype(dtype) target = self.scikit_data["target"].astype(dtype) scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target) test_data = data[0].reshape(1, -1) self._check_tree_model(spec, "multiArrayType", "doubleType", 1) coreml_model = create_model(spec) try: self.assertEqual( scikit_model.predict(test_data)[0].dtype, type(coreml_model.predict({"data": test_data})["target"]), ) self.assertAlmostEqual( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], msg="{} != {} for Dtype: {}".format( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], dtype, ), ) except RuntimeError: print("{} not supported. ".format(dtype))
Example #19
Source File: arif.py From HpBandSter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def extend_partial(self, obs_losses, num_steps, config=None): # TODO: add variance predictions if config is None: config = [] d_losses = self.apply_differencing(obs_losses) for t in range(num_steps): x = np.hstack([d_losses[-self.order:], config]) y = self.rfr.predict([x]) d_losses = np.hstack([d_losses, y]) prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:]) return(prediction)
Example #20
Source File: arif.py From HpBandSter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, losses, configs=None): if configs is None: configs = [[]]*len(times) # convert learning curves into X and y data X = [] y = [] for l,c in zip(losses, configs): l = self.apply_differencing(l) for i in range(self.order, len(l)): X.append(np.hstack([l[i-self.order:i], c])) y.append(l[i]) self.X = np.array(X) self.y = np.array(y) self.rfr = rfr().fit(self.X,self.y)
Example #21
Source File: test_regression_tests.py From drifter_ml with MIT License | 6 votes |
def generate_regression_data_and_models(): df = pd.DataFrame() for _ in range(1000): a = np.random.normal(0, 1) b = np.random.normal(0, 3) c = np.random.normal(12, 4) target = a + b + c df = df.append({ "A": a, "B": b, "C": c, "target": target }, ignore_index=True) reg1 = tree.DecisionTreeRegressor() reg2 = ensemble.RandomForestRegressor() column_names = ["A", "B", "C"] target_name = "target" X = df[column_names] reg1.fit(X, df[target_name]) reg2.fit(X, df[target_name]) return df, column_names, target_name, reg1, reg2
Example #22
Source File: RandomForest.py From pyGPGO with MIT License | 6 votes |
def fit(self, X, y): """ Fit a Random Forest model to data `X` and targets `y`. Parameters ---------- X : array-like Input values. y: array-like Target values. """ self.X = X self.y = y self.n = self.X.shape[0] self.model = RandomForestRegressor(**self.params) self.model.fit(X, y)
Example #23
Source File: ranking.py From news-popularity-prediction with Apache License 2.0 | 6 votes |
def get_regressor_fitted(file_path, X_train, X_test, y_train, y_test): if os.path.exists(file_path): try: regressor_fitted = load_sklearn_model(file_path) except EOFError as e: print(file_path) raise e else: regressor = RandomForestRegressor(n_estimators=50, criterion="mse", max_features="auto", n_jobs=get_threads_number()) regressor_fitted = regressor.fit(X_train, y_train) store_sklearn_model(file_path, regressor_fitted) return regressor_fitted
Example #24
Source File: random_forest_regressor.py From Python with MIT License | 5 votes |
def main(): """ Random Forest Regressor Example using sklearn function. Boston house price dataset is used to demonstrate the algorithm. """ # Load Boston house price dataset boston = load_boston() print(boston.keys()) # Split dataset into train and test data X = boston["data"] # features Y = boston["target"] x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=0.3, random_state=1 ) # Random Forest Regressor rand_for = RandomForestRegressor(random_state=42, n_estimators=300) rand_for.fit(x_train, y_train) # Predict target for test data predictions = rand_for.predict(x_test) predictions = predictions.reshape(len(predictions), 1) # Error printing print(f"Mean Absolute Error:\t {mean_absolute_error(y_test, predictions)}") print(f"Mean Square Error :\t {mean_squared_error(y_test, predictions)}")
Example #25
Source File: model_wrapper.py From AMPL with MIT License | 5 votes |
def __init__(self, params, featurizer, ds_client): """Initializes DCRFModelWrapper object. Args: params (Namespace object): contains all parameter information. featurizer (Featurization): Object managing the featurization of compounds ds_client: datastore client. """ super().__init__(params, featurizer, ds_client) self.best_model_dir = os.path.join(self.output_dir, 'best_model') self.model_dir = self.best_model_dir self.baseline_model_dir = self.best_model_dir os.makedirs(self.best_model_dir, exist_ok=True) if self.params.prediction_type == 'regression': rf_model = RandomForestRegressor(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, n_jobs=-1) else: rf_model = RandomForestClassifier(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, n_jobs=-1) self.model = dc.models.sklearn_models.SklearnModel(rf_model, model_dir=self.best_model_dir) # ****************************************************************************************
Example #26
Source File: random_forest.py From driverlessai-recipes with Apache License 2.0 | 5 votes |
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): orig_cols = list(X.names) if self.num_classes >= 2: lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) model = RandomForestClassifier(**self.params) else: model = RandomForestRegressor(**self.params) # Replace missing values with a value smaller than all observed values self.min = dict() for col in X.names: XX = X[:, col] self.min[col] = XX.min1() if self.min[col] is None or np.isnan(self.min[col]): self.min[col] = -1e10 else: self.min[col] -= 1 XX.replace(None, self.min[col]) X[:, col] = XX assert X[dt.isna(dt.f[col]), col].nrows == 0 X = X.to_numpy() model.fit(X, y) importances = np.array(model.feature_importances_) self.set_model_properties(model=model, features=orig_cols, importances=importances.tolist(), iterations=self.params['n_estimators'])
Example #27
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_has_fit_parameter(): assert not has_fit_parameter(KNeighborsClassifier, "sample_weight") assert has_fit_parameter(RandomForestRegressor, "sample_weight") assert has_fit_parameter(SVR, "sample_weight") assert has_fit_parameter(SVR(), "sample_weight") class TestClassWithDeprecatedFitMethod: @deprecated("Deprecated for the purpose of testing has_fit_parameter") def fit(self, X, y, sample_weight=None): pass assert has_fit_parameter(TestClassWithDeprecatedFitMethod, "sample_weight"), \ "has_fit_parameter fails for class with deprecated fit method."
Example #28
Source File: model_wrapper.py From AMPL with MIT License | 5 votes |
def reload_model(self, reload_dir): """Loads a saved random forest model from the specified directory. Also loads any transformers that were saved with it. Args: reload_dir (str): Directory where saved model is located. model_dataset (ModelDataset Object): contains the current full dataset Side effects: Resets the value of model, transformers, and transformers_x """ if self.params.prediction_type == 'regression': rf_model = RandomForestRegressor(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, n_jobs=-1) if self.params.transformers: self.log.info("Reloading transformers from file %s" % self.params.transformer_key) if self.params.datastore: self.transformers, self.transformers_x = dsf.retrieve_dataset_by_datasetkey(dataset_key = self.params.transformer_key, bucket = self.params.transformer_bucket, client= self.ds_client ) else: self.transformers, self.transformers_x = pickle.load(open( self.params.transformer_key, 'rb' )) # TODO: We shouldn't be reloading the transformers here - that should only happen when we load # TODO: a previously trained model to run predictions on a new dataset. else: rf_model = RandomForestClassifier(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, n_jobs=-1) self.model = dc.models.sklearn_models.SklearnModel(rf_model, model_dir=reload_dir) self.model.reload() # ****************************************************************************************
Example #29
Source File: features.py From lumin with Apache License 2.0 | 5 votes |
def get_rf_feat_importance(rf:Union[RandomForestRegressor,RandomForestClassifier], inputs:pd.DataFrame, targets:np.ndarray, weights:Optional[np.ndarray]=None) -> pd.DataFrame: r''' Compute feature importance for a Random Forest model using rfpimp. Arguments: rf: trained Random Forest model inputs: input data as Pandas DataFrame targets: target data as Numpy array weights: Optional data weights as Numpy array ''' return importances(rf, inputs, targets, features=inputs.columns, sample_weights=weights).reset_index()
Example #30
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)