Python sklearn.ensemble.ExtraTreesClassifier() Examples
The following are 30
code examples of sklearn.ensemble.ExtraTreesClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: ExtraTreesClassifier.py From mltk-algo-contrib with Apache License 2.0 | 6 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['random_state', 'n_estimators', 'max_depth', 'min_samples_split', 'max_leaf_nodes'], strs=['max_features', 'criterion'], ) if 'max_depth' not in out_params: out_params.setdefault('max_leaf_nodes', 2000) if 'max_features' in out_params: out_params['max_features'] = handle_max_features(out_params['max_features']) self.estimator = _ExtraTreesClassifier(class_weight='balanced', **out_params)
Example #2
Source File: classification.py From pyeo with GNU General Public License v3.0 | 6 votes |
def create_model_from_signatures(sig_csv_path, model_out, sig_datatype=np.int32): """ Takes a .csv file containing class signatures - produced by extract_features_to_csv - and uses it to train and pickle a scikit-learn model. Parameters ---------- sig_csv_path The path to the signatures file model_out The location to save the pickled model to. sig_datatype The datatype to read the csv as. Defaults to int32. Notes ----- At present, the model is an ExtraTreesClassifier arrived at by tpot: model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2, min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced') """ model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2, min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced') features, labels = load_signatures(sig_csv_path, sig_datatype) model.fit(features, labels) joblib.dump(model, model_out)
Example #3
Source File: sklearn_test.py From nni with MIT License | 6 votes |
def test(): url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2' urllib.request.urlretrieve(url_zip_train, filename='train.bz2') f_svm = open('train.svm', 'wt') with bz2.open('train.bz2', 'rb') as f_zip: data = f_zip.read() f_svm.write(data.decode('utf-8')) f_svm.close() X, y = load_svmlight_file('train.svm') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression()) # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) pipeline.fit(X_train, y_train) print("Pipeline Score: ", pipeline.score(X_train, y_train))
Example #4
Source File: benchmark_test.py From nni with MIT License | 6 votes |
def test_time(pipeline_name, name, path): if pipeline_name == "LR": pipeline = make_pipeline(LogisticRegression()) if pipeline_name == "FGS": pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) if pipeline_name == "Tree": pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) test_benchmark = Benchmark() print("Dataset:\t", name) print("Pipeline:\t", pipeline_name) starttime = datetime.datetime.now() test_benchmark.run_test(pipeline, name, path) endtime = datetime.datetime.now() print("Used time: ", (endtime - starttime).microseconds/1000) print("")
Example #5
Source File: models_classification.py From easyML with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__( self,data_block, predictors=[],cv_folds=10, scoring_metric='accuracy',additional_display_metrics=[]): base_classification.__init__( self, alg=ExtraTreesClassifier(), data_block=data_block, predictors=predictors,cv_folds=cv_folds, scoring_metric=scoring_metric, additional_display_metrics=additional_display_metrics) self.model_output = pd.Series(self.default_parameters) self.model_output['Feature_Importance'] = "-" self.model_output['OOB_Score'] = "-" #Set parameters to default values: self.set_parameters(set_default=True)
Example #6
Source File: PipeTasks.py From ProFET with GNU General Public License v3.0 | 6 votes |
def GetKFeatures(filename, method='RFE',kbest=30,alpha=0.01, reduceMatrix = True): ''' Gets best features using chosen method (K-best, RFE, RFECV,'L1' (RandomizedLogisticRegression),'Tree' (ExtraTreesClassifier), mrmr), then prints top K features' names (from featNames). If reduceMatrix = True, then also returns X reduced to the K best features. Available methods' names are: 'RFE','RFECV','RandomizedLogisticRegression','K-best','ExtraTreesClassifier'.. Note, that effectiveyl, Any scikit learn method could be used, if correctly imported.. ''' #est = method() ''' Gets the K-best features (filtered by FDR, then select best ranked by t-test , more advanced options can be implemented). Save the data/matrix with the resulting/kept features to a new output file, "REDUCED_Feat.csv" ''' features, labels, lb_encoder,featureNames = load_data(filename) X, y = features, labels # change the names as ints back to strings class_names=lb_encoder.inverse_transform(y) print("Data and labels imported. PreFilter Feature matrix shape:") print(X.shape) selectK = SelectKBest(k=kbest) selectK.fit(X,y) selectK_mask=selectK.get_support() K_featnames = featureNames[selectK_mask] print('X After K filter:',X.shape) print("K_featnames: %s" %(K_featnames)) if reduceMatrix ==True : Reduced_df = pd.read_csv(filename, index_col=0) Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]] Reduced_df.to_csv('REDUCED_Feat.csv') print('Saved to REDUCED_Feat.csv') return Reduced_df #WORKS! But unreadable with too many features!
Example #7
Source File: models.py From jh-kaggle-util with Apache License 2.0 | 6 votes |
def run_sklearn(): n_trees = 100 n_folds = 3 # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/ alg_list = [ ['lreg',LinearRegression()], ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)], ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)], ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)], ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)] ] start_time = time.time() for name,alg in alg_list: train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False) train.run() train = None elapsed_time = time.time() - start_time print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))
Example #8
Source File: extra_trees.py From mljar-supervised with MIT License | 6 votes |
def __init__(self, params): super(ExtraTreesAlgorithm, self).__init__(params) logger.debug("ExtraTreesAlgorithm.__init__") self.library_version = sklearn.__version__ self.trees_in_step = additional.get("trees_in_step", 100) self.max_steps = additional.get("max_steps", 50) self.early_stopping_rounds = additional.get("early_stopping_rounds", 50) self.model = ExtraTreesClassifier( n_estimators=self.trees_in_step, criterion=params.get("criterion", "gini"), max_features=params.get("max_features", 0.6), min_samples_split=params.get("min_samples_split", 30), warm_start=True, n_jobs=-1, random_state=params.get("seed", 1), )
Example #9
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def feature_importances(X,y): # the output does not stable because of the randomness # Build a classification task using 3 informative features #X, y = make_classification(n_samples=1000,n_features=10,n_informative=3,n_redundant=0,n_repeated=0,n_classes=2,n_state=0,shuffle=False) # Build a forest and compute the feature importances from sklearn.ensemble import ExtraTreesClassifier forest = ExtraTreesClassifier(n_estimators= 25, criterion = 'entropy' , random_state=None) forest.fit(X, y) importances = forest.feature_importances_ std = np.std([tree.feature_importances_ for tree in forest.estimators_],axis=0) indices = np.argsort(importances)[::-1] # print (indices) # Print the feature ranking print("Feature ranking:") sum1 = 0.0 for f in range(80): print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]])) sum1 = sum1 + importances[indices[f]] print (sum1) # Plot the feature importances of the forest #width = 0.5 x_len = range(len(importances)) plt.figure() plt.title("Feature importances") plt.bar(x_len, importances[indices] ,color="r", yerr=std[indices], align="center") plt.xticks(x_len, indices) plt.xlim([-1, max(x_len)+1]) plt.show() ######################################READ DATA####################################################
Example #10
Source File: binary.py From stacking with MIT License | 5 votes |
def build_model(self): return ExtraTreesClassifier(**self.params)
Example #11
Source File: multiclass.py From stacking with MIT License | 5 votes |
def build_model(self): return ExtraTreesClassifier(**self.params)
Example #12
Source File: plot.py From speedml with MIT License | 5 votes |
def importance(self): """ Plot importance of features based on ExtraTreesClassifier. """ Base.data_n() X = Base.train_n y = X[Base.target].copy() X = X.drop([Base.target], axis=1) model = ExtraTreesClassifier() model.fit(X, y) self._plot_importance(X.columns, model.feature_importances_)
Example #13
Source File: one-classifier.py From quantopian-ensemble-methods with MIT License | 5 votes |
def initialize(context): set_symbol_lookup_date('2012-01-01') # Parameters to be changed context.model = ExtraTreesClassifier(n_estimators=300) context.lookback = 14 context.history_range = 1000 context.beta_coefficient = 0.0 context.percentage_change = 0.025 context.maximum_leverage = 2.0 context.number_of_stocks = 150 context.maximum_pe_ratio = 8 context.maximum_market_cap = 0.1e9 context.starting_probability = 0.5 # End of parameters schedule_function(create_model, date_rules.month_start(), time_rules.market_open()) schedule_function(rebalance, date_rules.month_start(), time_rules.market_open()) schedule_function(trade, date_rules.every_day(), time_rules.market_open()) context.algorithm_returns = [] context.longs = [] context.shorts = [] context.training_stocks = symbols('SPY') context.trading_stocks = [] context.beta = 1.0 context.beta_list = [] context.completed = False
Example #14
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)
Example #15
Source File: utilanalisis.py From pghumor with Apache License 2.0 | 5 votes |
def tree_based_feature_selection(features, clases, nombres_features_ordenadas): print("Realizando tree-based feature selection") clf = ExtraTreesClassifier(n_estimators=1000) clf.fit(features, clases) imprimir_importancias(clf.feature_importances_, "Tree-based feature selection", nombres_features_ordenadas)
Example #16
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.ensemble.AdaBoostClassifier, ensemble.AdaBoostClassifier) self.assertIs(df.ensemble.AdaBoostRegressor, ensemble.AdaBoostRegressor) self.assertIs(df.ensemble.BaggingClassifier, ensemble.BaggingClassifier) self.assertIs(df.ensemble.BaggingRegressor, ensemble.BaggingRegressor) self.assertIs(df.ensemble.ExtraTreesClassifier, ensemble.ExtraTreesClassifier) self.assertIs(df.ensemble.ExtraTreesRegressor, ensemble.ExtraTreesRegressor) self.assertIs(df.ensemble.GradientBoostingClassifier, ensemble.GradientBoostingClassifier) self.assertIs(df.ensemble.GradientBoostingRegressor, ensemble.GradientBoostingRegressor) self.assertIs(df.ensemble.IsolationForest, ensemble.IsolationForest) self.assertIs(df.ensemble.RandomForestClassifier, ensemble.RandomForestClassifier) self.assertIs(df.ensemble.RandomTreesEmbedding, ensemble.RandomTreesEmbedding) self.assertIs(df.ensemble.RandomForestRegressor, ensemble.RandomForestRegressor) self.assertIs(df.ensemble.VotingClassifier, ensemble.VotingClassifier)
Example #17
Source File: imgPred_training.py From python-urbanPlanning with MIT License | 5 votes |
def __init__(self, X, label_words): self.le=preprocessing.LabelEncoder() self.clf=ExtraTreesClassifier(n_estimators=100,max_depth=16,random_state=0) #http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html y=self.encode_labels(label_words) self.clf.fit(np.asarray(X),y) with open('clf.pkl', 'wb') as f: #存储训练好的图像分类器模型 pickle.dump(self.clf, f)
Example #18
Source File: baselines.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def fit(self, X_train, y_train, X_val, y_val): results = dict() self.all_nan = np.all(np.isnan(X_train), axis=0) X_train = X_train[:, ~self.all_nan] X_val = X_val[:, ~self.all_nan] X_train = np.nan_to_num(X_train) X_val = np.nan_to_num(X_val) self.config["warm_start"] = False self.num_classes = len(np.unique(y_train)) if self.num_classes>2: print("==> Using warmstarting for multiclass") final_n_estimators = self.config["n_estimators"] self.config["n_estimators"] = 8 self.config["warm_start"] = True self.model = ExtraTreesClassifier(**self.config) self.model.fit(X_train, y_train) if self.config["warm_start"]: self.model.n_estimators = final_n_estimators self.model.fit(X_train, y_train) pred_val_probas = self.model.predict_proba(X_val) pred_train = self.model.predict(X_train) pred_val = self.model.predict(X_val) results["train_acc"] = metrics.accuracy_score(y_train, pred_train) results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train) results["val_acc"] = metrics.accuracy_score(y_val, pred_val) results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val) results["val_preds"] = pred_val_probas.tolist() results["labels"] = y_val.tolist() return results
Example #19
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert_equal(tree.min_impurity_split, 0.1)
Example #20
Source File: model_loop.py From fake-news-detection with MIT License | 5 votes |
def define_clfs_params(self): ''' Defines all relevant parameters and classes for classfier objects. Edit these if you wish to change parameters. ''' # These are the classifiers self.clfs = { 'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1), 'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'), 'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200), 'LR': LogisticRegression(penalty = 'l1', C = 1e5), 'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0), 'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10), 'NB': GaussianNB(), 'DT': DecisionTreeClassifier(), 'SGD': SGDClassifier(loss = 'log', penalty = 'l2'), 'KNN': KNeighborsClassifier(n_neighbors = 3) } # These are the parameters which will be run through self.params = { 'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]}, 'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]}, 'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]}, 'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]}, 'NB': {}, 'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]}, 'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']} }
Example #21
Source File: test_autoai_libs.py From lale with Apache License 2.0 | 5 votes |
def test_FS2(self): from sklearn.ensemble import ExtraTreesClassifier trainable = lale.lib.autoai_libs.FS2( cols_ids_must_keep=[1], additional_col_count_to_keep=3, ptype='classification', eval_algo=ExtraTreesClassifier, ) self.doTest(trainable, **self._iris)
Example #22
Source File: benchmark_test.py From nni with MIT License | 5 votes |
def test_memory(pipeline_name, name, path): if pipeline_name == "LR": pipeline = make_pipeline(LogisticRegression()) if pipeline_name == "FGS": pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) if pipeline_name == "Tree": pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) test_benchmark = Benchmark() print("Dataset:\t", name) print("Pipeline:\t", pipeline_name) test_benchmark.run_test(pipeline, name, path) print("")
Example #23
Source File: models.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def run_sklearn(): n_trees = 100 n_folds = 3 # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/ alg_list = [ ['rforest',RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=1, max_depth=3)], ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=3,n_jobs=-1)], ['adaboost',AdaBoostClassifier(base_estimator=None, n_estimators=600, learning_rate=1.0)], ['knn', sklearn.neighbors.KNeighborsClassifier(n_neighbors=5,n_jobs=-1)] ] start_time = time.time() for name,alg in alg_list: train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False) train.run() train = None
Example #24
Source File: extra_trees.py From DataMiningCompetitionFirstPrize with MIT License | 5 votes |
def learn(x, y, test_x): cw = {"0":variables.weight_0_rf, "1000":variables.weight_1000_rf, "1500":variables.weight_1500_rf, "2000":variables.weight_2000_rf} clf = ExtraTreesClassifier(n_jobs = -1, n_estimators=variables.n_estimators_et, max_depth=variables.max_depth_et, random_state=0, min_samples_split=variables.min_samples_split_et, min_samples_leaf=variables.min_samples_leaf_et, max_features=variables.max_feature_et, max_leaf_nodes=variables.max_leaf_nodes_et, criterion=variables.criterion_et, min_impurity_split=variables.min_impurity_split_et, class_weight=variables.cw_et).fit(x, y) print "n_estimators=", variables.n_estimators_et, print "max_depth=", variables.max_depth_et, print "min_samples_split=", variables.min_samples_split_et, print "min_samples_leaf=", variables.min_samples_leaf_et, print "max_features=",variables.max_feature_et, print "max_leaf_nodes=",variables.max_leaf_nodes_et, print "criterion=",variables.criterion_et, print "min_impurity_split=",variables.min_impurity_split_et, print "class_weight=", variables.cw_et prediction_list = clf.predict(test_x) prediction_list_prob = clf.predict_proba(test_x) return prediction_list,prediction_list_prob
Example #25
Source File: trainer.py From Python-Machine-Learning-Cookbook-Second-Edition with MIT License | 5 votes |
def __init__(self, X, label_words): self.le = preprocessing.LabelEncoder() self.clf = ExtraTreesClassifier(n_estimators=100, max_depth=16, random_state=0) y = self.encode_labels(label_words) self.clf.fit(np.asarray(X), y)
Example #26
Source File: sentiment_analysis_ml.py From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 | 5 votes |
def extract_tree(train_vecs,y_train,test_vecs,y_test): clf = ExtraTreesClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0) clf.fit(train_vecs,y_train) joblib.dump(clf,storedpaths+'model_extracttree.pkl') test_scores=clf.score(test_vecs,y_test) return test_scores # 训练 GBDT 分类算法
Example #27
Source File: sentiment_analysis_ml.py From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 | 5 votes |
def random_forest(train_vecs,y_train,test_vecs,y_test): clf = RandomForestClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0) clf.fit(train_vecs,y_train) joblib.dump(clf,storedpaths+'model_randomforest.pkl') test_scores=clf.score(test_vecs,y_test) return test_scores # 训练 ExtraTreesClassifier 分类算法
Example #28
Source File: extra_trees.py From driverlessai-recipes with Apache License 2.0 | 5 votes |
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): orig_cols = list(X.names) if self.num_classes >= 2: lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) model = ExtraTreesClassifier(**self.params) else: model = ExtraTreesRegressor(**self.params) # Replace missing values with a value smaller than all observed values self.min = dict() for col in X.names: XX = X[:, col] self.min[col] = XX.min1() if self.min[col] is None or np.isnan(self.min[col]): self.min[col] = -1e10 else: self.min[col] -= 1 XX.replace(None, self.min[col]) X[:, col] = XX assert X[dt.isna(dt.f[col]), col].nrows == 0 X = X.to_numpy() model.fit(X, y) importances = np.array(model.feature_importances_) self.set_model_properties(model=model, features=orig_cols, importances=importances.tolist(), iterations=self.params['n_estimators'])
Example #29
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)
Example #30
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert_equal(tree.min_impurity_split, 0.1)