Python Examples of sklearn.tree.DecisionTreeRegressor

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

9 votes

def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR(gamma='scale')]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)

Source File: regression_cart.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

7 votes

def regression_cart(x,y):
    '''
        Estimate a CART regressor
    '''
    # create the regressor object
    cart = sk.DecisionTreeRegressor(min_samples_split=80,
        max_features="auto", random_state=66666, 
        max_depth=5)

    # estimate the model
    cart.fit(x,y)

    # return the object
    return cart

# the file name of the dataset

Source File: FSRegression.py From CausalDiscoveryToolbox with MIT License

6 votes

def predict_features(self, df_features, df_target, idx=0, **kwargs):
        """For one variable, predict its neighbouring nodes.

        Args:
            df_features (pandas.DataFrame):
            df_target (pandas.Series):
            idx (int): (optional) for printing purposes
            kwargs (dict): additional options for algorithms

        Returns:
            list: scores of each feature relatively to the target
        """
        X = df_features.values
        y = df_target.values
        regressor = DecisionTreeRegressor()
        regressor.fit(X, y)

        return regressor.feature_importances_

Source File: test_tree.py From m2cgen with MIT License

6 votes

def test_two_conditions():
    estimator = tree.DecisionTreeRegressor()

    estimator.fit([[1], [2], [3]], [1, 2, 3])

    assembler = assemblers.TreeModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.IfExpr(
        ast.CompExpr(
            ast.FeatureRef(0),
            ast.NumVal(1.5),
            ast.CompOpType.LTE),
        ast.NumVal(1.0),
        ast.IfExpr(
            ast.CompExpr(
                ast.FeatureRef(0),
                ast.NumVal(2.5),
                ast.CompOpType.LTE),
            ast.NumVal(2.0),
            ast.NumVal(3.0)))

    assert utils.cmp_exprs(actual, expected)

Source File: test_meta.py From m2cgen with MIT License

6 votes

def test_ransac_custom_base_estimator():
    base_estimator = DecisionTreeRegressor()
    estimator = linear_model.RANSACRegressor(
        base_estimator=base_estimator,
        random_state=1)
    estimator.fit([[1], [2], [3]], [1, 2, 3])

    assembler = assemblers.RANSACModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.IfExpr(
        ast.CompExpr(
            ast.FeatureRef(0),
            ast.NumVal(2.5),
            ast.CompOpType.LTE),
        ast.NumVal(2.0),
        ast.NumVal(3.0))

    assert utils.cmp_exprs(actual, expected)

Source File: AdaBoost_Regression.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

6 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
                              n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: test_tree.py From m2cgen with MIT License

6 votes

def test_single_condition():
    estimator = tree.DecisionTreeRegressor()

    estimator.fit([[1], [2]], [1, 2])

    assembler = assemblers.TreeModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.IfExpr(
        ast.CompExpr(
            ast.FeatureRef(0),
            ast.NumVal(1.5),
            ast.CompOpType.LTE),
        ast.NumVal(1.0),
        ast.NumVal(2.0))

    assert utils.cmp_exprs(actual, expected)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_bootstrap_samples():
    # Test that bootstrapping samples generate non-perfect base estimators.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)

    # without bootstrap, all trees are perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=False,
                                random_state=rng).fit(X_train, y_train)

    assert_equal(base_estimator.score(X_train, y_train),
                 ensemble.score(X_train, y_train))

    # with bootstrap, trees are no longer perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=True,
                                random_state=rng).fit(X_train, y_train)

    assert_greater(base_estimator.score(X_train, y_train),
                   ensemble.score(X_train, y_train))

    # check that each sampling correspond to a complete bootstrap resample.
    # the size of each bootstrap should be the same as the input data but
    # the data should be different (checked using the hash of the data).
    ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(),
                                bootstrap=True).fit(X_train, y_train)
    training_hash = []
    for estimator in ensemble.estimators_:
        assert estimator.training_size_ == X_train.shape[0]
        training_hash.append(estimator.training_hash_)
    assert len(set(training_hash)) == len(training_hash)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_bootstrap_features():
    # Test that bootstrapping features may generate duplicate features.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=False,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_equal(boston.data.shape[1], np.unique(features).shape[0])

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=True,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_greater(boston.data.shape[1], np.unique(features).shape[0])

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)

Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_importances_gini_equal_mse():
    # Check that gini is equivalent to mse for binary output variable

    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    # The gini index and the mean square error (variance) might differ due
    # to numerical instability. Since those instabilities mainly occurs at
    # high tree depth, we restrict this maximal depth.
    clf = DecisionTreeClassifier(criterion="gini", max_depth=5,
                                 random_state=0).fit(X, y)
    reg = DecisionTreeRegressor(criterion="mse", max_depth=5,
                                random_state=0).fit(X, y)

    assert_almost_equal(clf.feature_importances_, reg.feature_importances_)
    assert_array_equal(clf.tree_.feature, reg.tree_.feature)
    assert_array_equal(clf.tree_.children_left, reg.tree_.children_left)
    assert_array_equal(clf.tree_.children_right, reg.tree_.children_right)
    assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)

Source File: BaggedTrees.py From LearningX with MIT License

6 votes

def fit(self, X, y):
        """
        Method:
         1) Create n_estimator tree estimators with greedy splitting
         2) For each estimator i, sample (X, y) randomly N times with replacement
            and train the estimator on this sampled dataset (X_i, y_i)
         3) Predict by taking the mean predictions of each estimator
        """
        self.models = []
        N = len(X)
        for i in range(self.n_estimators):
            # Create tree with greedy splits
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            # Bagging procedure
            idx_sample = np.random.choice(N, N)
            model.fit(X[idx_sample], y[idx_sample])
            self.models.append(model)

Source File: ewa.py From pycobra with MIT License

6 votes

def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.

        """
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue

Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_set_param_recursive_2():
    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
    pipeline_string = (
        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
    )
    tpot_obj = TPOTRegressor()
    tpot_obj._fit_init()
    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)

    assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42

Source File: MalGAN__v3.py From Malware-GAN with GNU General Public License v3.0

6 votes

def build_blackbox_detector(self):

        if self.blackbox is 'RF':
            blackbox_detector = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=1)
        elif self.blackbox is 'SVM':
            blackbox_detector = svm.SVC()
        elif self.blackbox is 'LR':
            blackbox_detector = linear_model.LogisticRegression()
        elif self.blackbox is 'DT':
            blackbox_detector = tree.DecisionTreeRegressor()
        elif self.blackbox is 'MLP':
            blackbox_detector = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
                                              solver='sgd', verbose=0, tol=1e-4, random_state=1,
                                              learning_rate_init=.1)
        elif self.blackbox is 'VOTE':
            blackbox_detector = VOTEClassifier()

        return blackbox_detector

Source File: test_io_types.py From coremltools with BSD 3-Clause "New" or "Revised" License

6 votes

def test_tree_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = DecisionTreeRegressor(random_state=1)
            data = self.scikit_data["data"].astype(dtype)
            target = self.scikit_data["target"].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(
                    scikit_model.predict(test_data)[0].dtype,
                    type(coreml_model.predict({"data": test_data})["target"]),
                )
                self.assertEqual(
                    scikit_model.predict(test_data)[0],
                    coreml_model.predict({"data": test_data})["target"],
                    msg="{} != {} for Dtype: {}".format(
                        scikit_model.predict(test_data)[0],
                        coreml_model.predict({"data": test_data})["target"],
                        dtype,
                    ),
                )
            except RuntimeError:
                print("{} not supported. ".format(dtype))

Source File: test_regression_tests.py From drifter_ml with MIT License

6 votes

def generate_regression_data_and_models():
    df = pd.DataFrame()
    for _ in range(1000):
        a = np.random.normal(0, 1)
        b = np.random.normal(0, 3)
        c = np.random.normal(12, 4)
        target = a + b + c
        df = df.append({
            "A": a,
            "B": b,
            "C": c,
            "target": target
        }, ignore_index=True)

    reg1 = tree.DecisionTreeRegressor()
    reg2 = ensemble.RandomForestRegressor()
    column_names = ["A", "B", "C"]
    target_name = "target"
    X = df[column_names]
    reg1.fit(X, df[target_name])
    reg2.fit(X, df[target_name])
    return df, column_names, target_name, reg1, reg2

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_ovr_ovo_regressor():
    # test that ovr and ovo work on regressors which don't have a decision_
    # function
    ovr = OneVsRestClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert_equal(len(ovr.estimators_), n_classes)
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert_greater(np.mean(pred == iris.target), .9)

    ovr = OneVsOneClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert_equal(len(ovr.estimators_), n_classes * (n_classes - 1) / 2)
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert_greater(np.mean(pred == iris.target), .9)

Source File: test_impute.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_imputation_pipeline_grid_search():
    # Test imputation within a pipeline + gridsearch.
    X = sparse_random_matrix(100, 100, density=0.10)
    missing_values = X.data[0]

    pipeline = Pipeline([('imputer',
                          SimpleImputer(missing_values=missing_values)),
                         ('tree',
                          tree.DecisionTreeRegressor(random_state=0))])

    parameters = {
        'imputer__strategy': ["mean", "median", "most_frequent"]
    }

    Y = sparse_random_matrix(100, 1, density=0.10).toarray()
    gs = GridSearchCV(pipeline, parameters)
    gs.fit(X, Y)

Source File: test_weight_boosting.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)

Source File: test_sklearn.py From delira with GNU Affero General Public License v3.0

5 votes

def test_load_save(self):

        from delira.io.sklearn import load_checkpoint, save_checkpoint
        from delira.models import SklearnEstimator
        from sklearn.tree import DecisionTreeRegressor
        import numpy as np

        net = SklearnEstimator(DecisionTreeRegressor())
        net.fit(X=np.random.rand(2, 32), y=np.random.rand(2))
        save_checkpoint("./model_sklearn.pkl", model=net)
        self.assertTrue(load_checkpoint("./model_sklearn.pkl"))

Source File: kernelcobra.py From pycobra with MIT License

5 votes

def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded. 
            Default is basic,
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self

Source File: cobra.py From pycobra with MIT License

5 votes

def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """

        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = LinearSVR(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self

Source File: GradientBoostedTree.py From LearningX with MIT License

5 votes

def fit(self, X, y):
        """
        Method:
         1) Train tree with greedy splitting on dataset (X, y)
         2) Recursively (n_estimator times) compute the residual between the
            truth and prediction values (res = y-y_pred), and use the residual as
            the next estimator's training y (keep X the same)
         3) The prediction at each estimator is the trained prediction plus
            the previous trained prediction, making the full prediction of the final
            model the sum of the predictions of each model.
        """
        self.models = []
        y_i = y
        y_pred_i = np.zeros(y.shape)
        for i in range(self.n_estimators):
            # Create tree with greedy splits
            model = DecisionTreeRegressor(max_depth=self.max_depth,
                                          min_samples_split=self.min_samples_split,
                                          min_samples_leaf=self.min_samples_leaf)
            model.fit(X, y_i)
            # Boosting procedure
            y_pred = model.predict(X) + y_pred_i  # add previous prediction
            res = y - y_pred  # compute residual
            y_i = res  # set training label as residual
            y_pred_i = y_pred  # update prediction value
            self.models.append(model)

Source File: RandomForest.py From LearningX with MIT License

5 votes

def fit(self, X, y):
        """
        Method:
         1) Create n_estimator tree estimators with random splitting
            on d/3 max features
         2) For each estimator i, sample (X, y) randomly N times with replacement
            and train the estimator on this sampled dataset (X_i, y_i)
         3) Predict by taking the mean predictions of each estimator
        """
        self.models = []
        N, d = X.shape
        for i in range(self.n_estimators):
            # Create tree with random splitting on d/3 max features
            model = DecisionTreeRegressor(max_depth=self.max_depth,
                                          min_samples_split=self.min_samples_split,
                                          min_samples_leaf=self.min_samples_leaf,
                                          splitter="random",
                                          max_features=d/3)
            # Bagging procedure
            idx_sample = np.random.choice(N, N)  # random sampling of length N
            model.fit(X[idx_sample], y[idx_sample])  # fit on random sampling
            self.models.append(model)

Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0

5 votes

def test_export_pipeline_5():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel."""
    pipeline_string = (
        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    SelectFromModel(estimator=ExtraTreesRegressor(max_features=0.05, n_estimators=100), threshold=0.05),
    DecisionTreeRegressor(max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)

Source File: test_logitboost.py From skboost with MIT License

5 votes

def test_logitboost_musk_fitting():
    c = LogitBoostClassifier(
            base_estimator=DecisionTreeRegressor(max_depth=1),
            n_estimators=30,
            learning_rate=1.0
    )
    data = MUSK1()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.6)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.05

Source File: test_logitboost.py From skboost with MIT License

5 votes

def test_logitboost_hastie_fitting():
    c = LogitBoostClassifier(
            base_estimator=DecisionTreeRegressor(max_depth=1),
            n_estimators=30,
            learning_rate=1.0
    )
    data = Hastie_10_2()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.5)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.2

Source File: VOTEClassifier.py From Malware-GAN with GNU General Public License v3.0

5 votes

def __init__(self):
        self.RF = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=1)
        self.SVM = svm.SVC()
        self.LR = linear_model.LogisticRegression()
        self.DT = tree.DecisionTreeRegressor()
        self.MLP = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
                                 solver='sgd', verbose=0, tol=1e-4, random_state=1,
                                 learning_rate_init=.1)
        self.alpha = 0.2 * np.ones((5, ))

Source File: test_gentleboost.py From skboost with MIT License

5 votes

def test_gentleboost_hastie_fitting():
    c = GentleBoostClassifier(
        base_estimator=DecisionTreeRegressor(max_depth=1),
        n_estimators=30,
        learning_rate=1.0
    )
    data = Hastie_10_2()
    c.fit(data.data, np.sign(data.labels))
    assert_array_less(c.estimator_errors_, 0.5)
    assert zero_one_loss(np.sign(data.labels), c.predict(data.data)) < 0.2

Python sklearn.tree.DecisionTreeRegressor() Examples