Python sklearn.tree.DecisionTreeClassifier() Examples
The following are 30
code examples of sklearn.tree.DecisionTreeClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.tree
, or try the search function
.
Example #1
Source File: test_weight_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R')} clf = GridSearchCV(boost, parameters) clf.fit(iris.data, iris.target) # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters) clf.fit(boston.data, boston.target)
Example #2
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_huge_allocations(): n_bits = 8 * struct.calcsize("P") X = np.random.randn(10, 2) y = np.random.randint(0, 2, 10) # Sanity check: we cannot request more memory than the size of the address # space. Currently raises OverflowError. huge = 2 ** (n_bits + 1) clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge) assert_raises(Exception, clf.fit, X, y) # Non-regression test: MemoryError used to be dropped by Cython # because of missing "except *". huge = 2 ** (n_bits - 1) - 1 clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge) assert_raises(MemoryError, clf.fit, X, y)
Example #3
Source File: learn_pp.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, base_estimator=DecisionTreeClassifier(), error_threshold=0.5, n_estimators=30, n_ensembles=10, window_size=100, random_state=None): super().__init__() self.base_estimator = base_estimator self.n_estimators = n_estimators self.ensembles = [] self.ensemble_weights = [] self.classes = None self.n_ensembles = n_ensembles self.random = check_random_state(random_state) self.random_state = random_state self.error_threshold = error_threshold self.X_batch = [] self.y_batch = [] self.window_size = window_size
Example #4
Source File: learn_nse.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, base_estimator=DecisionTreeClassifier(), window_size=250, slope=0.5, crossing_point=10, n_estimators=15, pruning=None): super().__init__() self.ensemble = [] self.ensemble_weights = [] self.bkts = [] self.wkts = [] self.buffer = [] self.window_size = window_size self.slope = slope self.crossing_point = crossing_point self.n_estimators = n_estimators self.pruning = pruning self.X_batch = [] self.y_batch = [] self.instance_weights = [] self.base_estimator = cp.deepcopy(base_estimator) self.classes = None
Example #5
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_set_params_passes_all_parameters(): # Make sure all parameters are passed together to set_params # of nested estimator. Regression test for #9944 class TestDecisionTree(DecisionTreeClassifier): def set_params(self, **kwargs): super().set_params(**kwargs) # expected_kwargs is in test scope assert kwargs == expected_kwargs return self expected_kwargs = {'max_depth': 5, 'min_samples_leaf': 2} for est in [Pipeline([('estimator', TestDecisionTree())]), GridSearchCV(TestDecisionTree(), {})]: est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)
Example #6
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_sample_weight_invalid(): # Check sample weighting raises errors. X = np.arange(100)[:, np.newaxis] y = np.ones(100) y[:50] = 0.0 clf = DecisionTreeClassifier(random_state=0) sample_weight = np.random.rand(100, 1) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.array(0) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.ones(101) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.ones(99) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)
Example #7
Source File: AdaBoost_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def recspre(estrs, predata, datadict, zhe): mo, ze = estrs.split('-') model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=int(ze)), algorithm="SAMME", n_estimators=int(mo), learning_rate=0.8) model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1]) # 预测 yucede = model.predict(predata[:, :-1]) # 计算混淆矩阵 print(ConfuseMatrix(predata[:, -1], yucede)) return fmse(predata[:, -1], yucede) # 主函数
Example #8
Source File: AdaBoost_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=censhu), algorithm="SAMME", n_estimators=modelcount, learning_rate=0.8) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #9
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_score_sample_weight(): rng = np.random.RandomState(0) # test both ClassifierMixin and RegressorMixin estimators = [DecisionTreeClassifier(max_depth=2), DecisionTreeRegressor(max_depth=2)] sets = [datasets.load_iris(), datasets.load_boston()] for est, ds in zip(estimators, sets): est.fit(ds.data, ds.target) # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different assert_not_equal(est.score(ds.data, ds.target), est.score(ds.data, ds.target, sample_weight=sample_weight), msg="Unweighted and weighted scores " "are unexpectedly equal")
Example #10
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_importances_gini_equal_mse(): # Check that gini is equivalent to mse for binary output variable X, y = datasets.make_classification(n_samples=2000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) # The gini index and the mean square error (variance) might differ due # to numerical instability. Since those instabilities mainly occurs at # high tree depth, we restrict this maximal depth. clf = DecisionTreeClassifier(criterion="gini", max_depth=5, random_state=0).fit(X, y) reg = DecisionTreeRegressor(criterion="mse", max_depth=5, random_state=0).fit(X, y) assert_almost_equal(clf.feature_importances_, reg.feature_importances_) assert_array_equal(clf.tree_.feature, reg.tree_.feature) assert_array_equal(clf.tree_.children_left, reg.tree_.children_left) assert_array_equal(clf.tree_.children_right, reg.tree_.children_right) assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
Example #11
Source File: sasma.py From unmixing with MIT License | 6 votes |
def predict(self, fit=None, features=None, probabilities=False): ''' Predict the class labels (e.g., endmember types) based on an existing tree fit and new predictive features. Arguments: fit The result of tree.DecisionTreeClassifier.fit(); uses the last fit model if None. features The new X array/ new predictive features to use; should be (p x n), n samples with p features. ''' if fit is None: fit = self.last_fit if features is None: features = self.x_features_array if probabilities: shp = self.y_raster.shape return fit.predict(features.T).T.reshape((self.n_labels, shp[1], shp[2])) return fit.predict(features.T).reshape(self.y_raster.shape)
Example #12
Source File: utils.py From m2cgen with MIT License | 6 votes |
def __call__(self, estimator): fitted_estimator = estimator.fit(self.X_train, self.y_train) if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC, LightBaseClassifier)): y_pred = estimator.decision_function(self.X_test) elif isinstance(estimator, DecisionTreeClassifier): y_pred = estimator.predict_proba(self.X_test.astype(np.float32)) elif isinstance( estimator, (ForestClassifier, XGBClassifier, LGBMClassifier)): y_pred = estimator.predict_proba(self.X_test) else: y_pred = estimator.predict(self.X_test) return self.X_test, y_pred, fitted_estimator
Example #13
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #14
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_probability(): # Predict probabilities using DecisionTreeClassifier. for name, Tree in CLF_TREES.items(): clf = Tree(max_depth=1, max_features=1, random_state=42) clf.fit(iris.data, iris.target) prob_predict = clf.predict_proba(iris.data) assert_array_almost_equal(np.sum(prob_predict, 1), np.ones(iris.data.shape[0]), err_msg="Failed with {0}".format(name)) assert_array_equal(np.argmax(prob_predict, 1), clf.predict(iris.data), err_msg="Failed with {0}".format(name)) assert_almost_equal(clf.predict_proba(iris.data), np.exp(clf.predict_log_proba(iris.data)), 8, err_msg="Failed with {0}".format(name))
Example #15
Source File: test_export.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_plot_tree(pyplot): # mostly smoke tests # Check correctness of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2, criterion="gini", random_state=2) clf.fit(X, y) # Test export code feature_names = ['first feat', 'sepal_width'] nodes = plot_tree(clf, feature_names=feature_names) assert len(nodes) == 3 assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n" "samples = 6\nvalue = [3, 3]") assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]" assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]"
Example #16
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_17_decisiontreeclassifier(self): print("\ntest 17 (decision tree classifier with preprocessing) [multi-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("scaler", Binarizer()), ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test17sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #17
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_18_decisiontreeclassifier(self): print("\ntest 18 (decision tree classifier with preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("scaler", Binarizer()), ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test18sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #18
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_19_decisiontreeclassifier(self): print("\ntest 19 (decision tree classifier without preprocessing) [multi-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test19sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #19
Source File: test_pdpbox.py From docker-python with Apache License 2.0 | 6 votes |
def test_simple_pdp(self): # set up data data = pd.read_csv("/input/tests/data/fifa_2018_stats.csv") y = (data['Man of the Match'] == "Yes") feature_names = [i for i in data.columns if data[i].dtype in [np.int64]] X = data[feature_names] train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1) # Build simple model tree_model = DecisionTreeClassifier(random_state=0, max_depth=3).fit(train_X, train_y) # Set up pdp as table pdp_goals = pdp.pdp_isolate(model=tree_model, dataset=val_X, model_features=feature_names, feature='Goal Scored') # make plot pdp.pdp_plot(pdp_goals, 'Goal Scored')
Example #20
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_20_decisiontreeclassifier(self): print("\ntest 20 (decision tree classifier without preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test20sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #21
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_arrays_persist(): # Ensure property arrays' memory stays alive when tree disappears # non-regression for #2726 for attr in ['n_classes', 'value', 'children_left', 'children_right', 'threshold', 'impurity', 'feature', 'n_node_samples']: value = getattr(DecisionTreeClassifier().fit([[0], [1]], [0, 1]).tree_, attr) # if pointing to freed memory, contents may be arbitrary assert -3 <= value.flat[0] < 3, \ 'Array points to arbitrary memory'
Example #22
Source File: feed.py From quantified-self with MIT License | 5 votes |
def __init__(self): self.logger = Logger().get_logger() train_X = FeedData().train_X train_y = FeedData().train_y self.category_ids = FeedData().category_ids self.clf = tree.DecisionTreeClassifier() self.clf = self.clf.fit(train_X, train_y)
Example #23
Source File: forest_embedding.py From RandomForestClustering with MIT License | 5 votes |
def __init__(self, n_estimators=10, criterion='gini', max_depth=5, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features='auto', max_leaf_nodes=None, bootstrap=True, sparse_output=True, n_jobs=1, random_state=None, verbose=0, warm_start=False): super(RandomForestEmbedding, self).__init__( base_estimator=DecisionTreeClassifier(), n_estimators=n_estimators, estimator_params=("criterion", "max_depth", "min_samples_split", "min_samples_leaf", "min_weight_fraction_leaf", "max_features", "max_leaf_nodes", "random_state"), bootstrap=bootstrap, oob_score=False, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start) self.criterion = criterion self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.sparse_output = sparse_output
Example #24
Source File: sklearn_tune.py From ml-parameter-optimization with MIT License | 5 votes |
def tune_params(self): """ tune specified (and default) parameters """ self._start_time = time.time() self.default_params() # set default parameters self.score_init() # set initial score self._params_ada_tree = self.set_default(self.params_tree,self._params_default_ada_tree) tree = DecisionTreeClassifier(**self._params_ada_tree) # define tree classifier self._params['base_estimator'] = tree adaboost = AdaBoostClassifier(**self._params) self.apply_gridsearch(adaboost) self.print_progress(self._start_time) return self
Example #25
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_presort_sparse(): ests = (DecisionTreeClassifier(presort=True), DecisionTreeRegressor(presort=True)) sparse_matrices = (csr_matrix, csc_matrix, coo_matrix) y, X = datasets.make_multilabel_classification(random_state=0, n_samples=50, n_features=1, n_classes=20) y = y[:, 0] for est, sparse_matrix in product(ests, sparse_matrices): check_presort_sparse(est, sparse_matrix(X), y)
Example #26
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_search_cv_results_none_param(): X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1] estimators = (DecisionTreeRegressor(), DecisionTreeClassifier()) est_parameters = {"random_state": [0, None]} cv = KFold(random_state=0) for est in estimators: grid_search = GridSearchCV(est, est_parameters, cv=cv, ).fit(X, y) assert_array_equal(grid_search.cv_results_['param_random_state'], [0, None])
Example #27
Source File: test_export.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_graphviz_errors(): # Check for errors of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2) # Check not-fitted decision tree error out = StringIO() assert_raises(NotFittedError, export_graphviz, clf, out) clf.fit(X, y) # Check if it errors when length of feature_names # mismatches with number of features message = ("Length of feature_names, " "1 does not match number of features, 2") assert_raise_message(ValueError, message, export_graphviz, clf, None, feature_names=["a"]) message = ("Length of feature_names, " "3 does not match number of features, 2") assert_raise_message(ValueError, message, export_graphviz, clf, None, feature_names=["a", "b", "c"]) # Check error when argument is not an estimator message = "is not an estimator instance" assert_raise_message(TypeError, message, export_graphviz, clf.fit(X, y).tree_) # Check class_names error out = StringIO() assert_raises(IndexError, export_graphviz, clf, out, class_names=[]) # Check precision error out = StringIO() assert_raises_regex(ValueError, "should be greater or equal", export_graphviz, clf, out, precision=-1) assert_raises_regex(ValueError, "should be an integer", export_graphviz, clf, out, precision="1")
Example #28
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_big_input(): # Test if the warning for too large inputs is appropriate. X = np.repeat(10 ** 40., 4).astype(np.float64).reshape(-1, 1) clf = DecisionTreeClassifier() try: clf.fit(X, [0, 1, 0, 1]) except ValueError as e: assert_in("float32", str(e))
Example #29
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_importances(): # Check variable importances. X, y = datasets.make_classification(n_samples=5000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) for name, Tree in CLF_TREES.items(): clf = Tree(random_state=0) clf.fit(X, y) importances = clf.feature_importances_ n_important = np.sum(importances > 0.1) assert_equal(importances.shape[0], 10, "Failed with {0}".format(name)) assert_equal(n_important, 3, "Failed with {0}".format(name)) # Check on iris that importances are the same for all builders clf = DecisionTreeClassifier(random_state=0) clf.fit(iris.data, iris.target) clf2 = DecisionTreeClassifier(random_state=0, max_leaf_nodes=len(iris.data)) clf2.fit(iris.data, iris.target) assert_array_equal(clf.feature_importances_, clf2.feature_importances_)
Example #30
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_importances_raises(): # Check if variable importance before fit raises ValueError. clf = DecisionTreeClassifier() assert_raises(ValueError, getattr, clf, 'feature_importances_')