Python xgboost.XGBRegressor() Examples

The following are 30 code examples of xgboost.XGBRegressor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xgboost , or try the search function

Example #1

Source File: XGBoost_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

13 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = xgb.XGBRegressor(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount, silent=True, objective='reg:gamma')

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Example #2

Source File: run.py From nyaggle with MIT License

10 votes

def _dispatch_gbdt_class(algorithm_type: str, type_of_target: str):
    is_regression = type_of_target == 'continuous'

    if algorithm_type == 'lgbm':
        requires_lightgbm()
        from lightgbm import LGBMClassifier, LGBMRegressor
        return LGBMRegressor if is_regression else LGBMClassifier
    elif algorithm_type == 'cat':
        requires_catboost()
        from catboost import CatBoostClassifier, CatBoostRegressor
        return CatBoostRegressor if is_regression else CatBoostClassifier
    else:
        requires_xgboost()
        assert algorithm_type == 'xgb'
        from xgboost import XGBClassifier, XGBRegressor
        return XGBRegressor if is_regression else XGBClassifier

Example #3

Source File: test_xgboost_converters.py From onnxmltools with MIT License

6 votes

def test_xgb_regressor(self):
        iris = load_diabetes()
        x = iris.data
        y = iris.target
        x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
                                                       random_state=42)
        xgb = XGBRegressor()
        xgb.fit(x_train, y_train)
        conv_model = convert_xgboost(
            xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
        self.assertTrue(conv_model is not None)
        dump_data_and_model(
            x_test.astype("float32"),
            xgb,
            conv_model,
            basename="SklearnXGBRegressor-Dec3",
            allow_failure="StrictVersion("
            "onnx.__version__)"
            "< StrictVersion('1.3.0')",
        )

Example #4

Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License

6 votes

def opt_pro(optimization_protocol):
    opt = optimization_protocol(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=XGBRegressor,
        model_init_params=dict(
            max_depth=Integer(2, 10),
            n_estimators=Integer(50, 300),
            learning_rate=Real(0.1, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        model_extra_params=dict(fit=dict(eval_metric=Categorical(["rmse", "mae"]))),
        feature_engineer=FeatureEngineer([Categorical([nothing_transform], optional=True)]),
    )
    opt.go()
    return opt


##################################################
# Feature Engineering Steps
##################################################

Example #5

Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0

6 votes

def test_predict_single_feature_vector(self):
        # Train model
        training_data = datasets.make_regression(n_features=1)
        regressor = XGBRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1]]
        test_results = regressor.predict(np.asarray(test_data))

        # Serialise the models to Elasticsearch
        feature_names = ["f0"]
        model_id = "test_xgb_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )

        # Single feature
        es_results = es_model.predict(test_data[0])

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model()

Example #6

Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0

6 votes

def test_xgb_regressor(self):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = XGBRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
        test_results = regressor.predict(np.asarray(test_data))

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_xgb_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )

        es_results = es_model.predict(test_data)

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model()

Example #7

Source File: test_boosted_trees_regression_numeric.py From coremltools with BSD 3-Clause "New" or "Revised" License

6 votes

def _train_convert_evaluate_assert(self, bt_params={}, allowed_error={}, **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        # Train a model
        xgb_model = xgboost.XGBRegressor(**params)
        xgb_model.fit(self.X, self.target)

        # Convert the model (feature_names can't be given because of XGboost)
        spec = xgb_converter.convert(
            xgb_model, self.feature_names, self.output_name, force_32bit_float=False
        )

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, target="target", verbose=False)
            self._check_metrics(metrics, bt_params, allowed_error)

Example #8

Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License

6 votes

def test_optional_step_matching(env_boston, feature_engineer):
    """Tests that a Space containing `optional` `Categorical` Feature Engineering steps matches with
    the expected saved Experiments. This regression test is focused on issues that arise when
    `EngineerStep`s other than the last one in the `FeatureEngineer` are `optional`. The simplified
    version of this test below, :func:`test_limited_optional_step_matching`, demonstrates that
    result matching works properly when only the final `EngineerStep` is `optional`"""
    opt_0 = DummyOptPro(iterations=20, random_state=32)
    opt_0.forge_experiment(XGBRegressor, feature_engineer=feature_engineer)
    opt_0.go()

    opt_1 = ExtraTreesOptPro(iterations=20, random_state=32)
    opt_1.forge_experiment(XGBRegressor, feature_engineer=feature_engineer)
    opt_1.get_ready()

    # Assert `opt_1` matched with all Experiments executed by `opt_0`
    assert len(opt_1.similar_experiments) == opt_0.successful_iterations

Example #9

Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License

6 votes

def test_optional_step_matching_by_exp(env_boston, es_0, es_1, es_2):
    """Test that the result of an Experiment is correctly matched by an OptPro with all-`optional`
    `EngineerStep` dimensions"""
    feature_engineer = [_ for _ in [es_0, es_1, es_2] if _ is not None]
    exp_0 = CVExperiment(XGBRegressor, feature_engineer=feature_engineer)

    opt_0 = ExtraTreesOptPro(iterations=1, random_state=32)
    opt_0.forge_experiment(
        XGBRegressor,
        feature_engineer=[
            Categorical([es_a], optional=True),
            Categorical([es_b, es_c], optional=True),
            Categorical([es_d, es_e], optional=True),
        ],
    )
    opt_0.get_ready()

    # Assert `opt_0` matched with `exp_0`
    assert len(opt_0.similar_experiments) == 1

Example #10

Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def plot_tree(self, num_trees=0, rankdir='UT', ax=None, **kwargs):

        """Plot specified tree.

        Parameters
        ----------
        booster : Booster, XGBModel
            Booster or XGBModel instance
        num_trees : int, default 0
            Specify the ordinal number of target tree
        rankdir : str, default "UT"
            Passed to graphiz via graph_attr
        ax : matplotlib Axes, default None
            Target axes instance. If None, new figure and axes will be created.
        kwargs :
            Other keywords passed to to_graphviz

        Returns
        -------
        ax : matplotlib Axes

        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        return xgb.plot_tree(self._df.estimator,
                             num_trees=num_trees, rankdir=rankdir, **kwargs)

Example #11

Source File: hockey_front_to_back.py From kaggle-code with MIT License

5 votes

def fit(self, X, y):
		"""load the data in, initiate the models"""
		self.X = X
		self.y = y
		self.opt_XGBoost_reg = xgb.XGBRegressor(**self.opt_xgb_params)
		self.opt_forest_reg = RandomForestRegressor(**self.opt_rf_params)
		self.opt_svm_reg = SVR(**self.opt_svm_params)
		""" fit the models """
		self.opt_XGBoost_reg.fit(self.X ,self.y)
		self.opt_forest_reg.fit(self.X ,self.y)
		self.opt_svm_reg.fit(self.X ,self.y)

Example #12

Source File: xgb_regressor.py From lale with Apache License 2.0

5 votes

def fit(self, X, y, **fit_params):
        result = XGBRegressorImpl(self.max_depth, self.learning_rate, self.n_estimators, 
                self.verbosity, self.silent, self.objective, self.booster, self.tree_method, self.n_jobs, 
                self.nthread, self.gamma, self.min_child_weight, self.max_delta_step, self.subsample, 
                self.colsample_bytree, self.colsample_bylevel, self.colsample_bynode, self.reg_alpha, 
                self.reg_lambda, self.scale_pos_weight, self.base_score, self.random_state, 
                self.seed, self.missing, self.importance_type)
        result._wrapped_model = XGBoostRegressor(
                    **self.get_params())
        if fit_params is None:
            result._wrapped_model.fit(X, y)
        else:
            result._wrapped_model.fit(X, y, **fit_params)
        return result

Example #13

Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License

5 votes

def test_limited_optional_step_matching(env_boston, feature_engineer):
    """Simplified counterpart to above :func:`test_optional_step_matching`. Tests that a Space
    containing `Categorical` Feature Engineering steps -- of which only the last ones may be
    `optional` -- matches with the expected saved Experiments. These test cases do not demonstrate
    the same bug being regression-tested by `test_optional_step_matching`. Instead, this test
    function exists to ensure that the areas close to the above bug are behaving properly and to
    help define the bug being tested by `test_optional_step_matching`. This function demonstrates
    that `optional` is not problematic when used only in the final `EngineerStep`"""
    opt_0 = DummyOptPro(iterations=20, random_state=32)
    opt_0.forge_experiment(XGBRegressor, feature_engineer=feature_engineer)
    opt_0.go()

    opt_1 = ExtraTreesOptPro(iterations=20, random_state=32)
    opt_1.forge_experiment(XGBRegressor, feature_engineer=feature_engineer)
    opt_1.get_ready()

    # Assert `opt_1` matched with all Experiments executed by `opt_0`
    assert len(opt_1.similar_experiments) == opt_0.successful_iterations


##################################################
# Exhaustive Experiment Matching Tests
##################################################
# The tests in this section are still related to the regression tests above, but these are
#   conducted using a group of one-off Experiments, comprising all `FeatureEngineer` permutations
#   that should fit within the `feature_engineer` space of `opt_0`:
#   ```
#   [
#       Categorical([es_a], optional=True),
#       Categorical([es_b, es_c], optional=True),
#       Categorical([es_d, es_e], optional=True),
#   ]
#   ```

Example #14

Source File: test_xgboost_converters.py From sklearn-onnx with MIT License

5 votes

def setUpClass(self):

        def custom_parser(scope, model, inputs, custom_parsers=None):
            if custom_parsers is not None and model in custom_parsers:
                return custom_parsers[model](
                    scope, model, inputs, custom_parsers=custom_parsers)
            if not all(isinstance(i, (numbers.Real, bool, np.bool_))
                       for i in model.classes_):
                raise NotImplementedError(
                    "Current converter does not support string labels.")
            return _parse_sklearn_classifier(scope, model, inputs)

        update_registered_converter(
            XGBClassifier, 'XGBClassifier',
            calculate_linear_classifier_output_shapes,
            convert_xgboost, parser=custom_parser,
            options={'zipmap': [True, False], 'nocl': [True, False]})
        update_registered_converter(
            XGBRegressor, 'XGBRegressor',
            calculate_linear_regressor_output_shapes,
            convert_xgboost,
            options={'zipmap': [True, False], 'nocl': [True, False]})

Example #15

Source File: test_xgboost_converters.py From sklearn-onnx with MIT License

5 votes

def test_xgb_regressor(self):
        iris = load_iris()
        X = iris.data[:, :2]
        y = iris.target

        xgb = XGBRegressor()
        xgb.fit(X, y)
        conv_model = convert_sklearn(
            xgb, initial_types=[
                ('input', FloatTensorType(shape=[None, X.shape[1]]))])
        self.assertTrue(conv_model is not None)
        dump_single_regression(xgb, suffix="-Dec4")

Example #16

Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def XGBRegressor(self):
        import xgboost as xgb
        return xgb.XGBRegressor

Example #17

Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def plot_importance(self, ax=None, height=0.2,
                        xlim=None, title='Feature importance',
                        xlabel='F score', ylabel='Features',
                        grid=True, **kwargs):

        """Plot importance based on fitted trees.

        Parameters
        ----------
        ax : matplotlib Axes, default None
            Target axes instance. If None, new figure and axes will be created.
        height : float, default 0.2
            Bar height, passed to ax.barh()
        xlim : tuple, default None
            Tuple passed to axes.xlim()
        title : str, default "Feature importance"
            Axes title. To disable, pass None.
        xlabel : str, default "F score"
            X axis title label. To disable, pass None.
        ylabel : str, default "Features"
            Y axis title label. To disable, pass None.
        kwargs :
            Other keywords passed to ax.barh()

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        # print(type(self._df.estimator.booster), self._df.estimator.booster)
        return xgb.plot_importance(self._df.estimator,
                                   ax=ax, height=height, xlim=xlim, title=title,
                                   xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)

Example #18

Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def to_graphviz(self, num_trees=0, rankdir='UT',
                    yes_color='#0000FF', no_color='#FF0000', **kwargs):

        """Convert specified tree to graphviz instance. IPython can automatically plot the
        returned graphiz instance. Otherwise, you shoud call .render() method
        of the returned graphiz instance.

        Parameters
        ----------
        num_trees : int, default 0
            Specify the ordinal number of target tree
        rankdir : str, default "UT"
            Passed to graphiz via graph_attr
        yes_color : str, default '#0000FF'
            Edge color when meets the node condigion.
        no_color : str, default '#FF0000'
            Edge color when doesn't meet the node condigion.
        kwargs :
            Other keywords passed to graphviz graph_attr

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        return xgb.to_graphviz(self._df.estimator,
                               num_trees=num_trees, rankdir=rankdir,
                               yes_color=yes_color, no_color=no_color, **kwargs)

Example #19

Source File: xgboost_sklearnmodel.py From Supply-demand-forecasting with MIT License

5 votes

def setClf(self):
        self.clf = XGBRegressor(max_depth=7, learning_rate=0.01, n_estimators=100)
        
        return

Example #20

Source File: Scikit_Models.py From pyaf with BSD 3-Clause "New" or "Revised" License

5 votes

def build_Scikit_Model(self):
        import xgboost as xgb

        import sklearn.svm as svm
        lXGBOptions = self.mOptions.mXGBOptions;
        if(lXGBOptions is None):
            lXGBOptions = self.get_default_xgb_options()
            
        self.mScikitModel = xgb.XGBRegressor(**lXGBOptions)

Example #21

Source File: test_causalml_estimator.py From dowhy with MIT License

5 votes

def test_causalml_XLearner(self, init_data):
        # Defined a linear dataset with a given set of properties
        data = init_data

        # Create a model that captures the same
        model = CausalModel(
            data=data['df'],
            treatment=data['treatment_name'],
            outcome=data['outcome_name'],
            effect_modifiers=data['effect_modifier_names'],
            graph=data['gml_graph']
        )

        # Identify the effects within the model
        identified_estimand = model.identify_effect(
            proceed_when_unidentifiable=True
        )

        xl_estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.causalml.inference.meta.BaseXRegressor",
            method_params={"init_params":{
                    'learner':XGBRegressor()
                }
            }
        )

        print("The X Learner estimate obtained:")
        print(xl_estimate)

Example #22

Source File: test_causalml_estimator.py From dowhy with MIT License

5 votes

def test_causalml_RLearner(self, init_data):
        # Defined a linear dataset with a given set of properties
        data = init_data

        # Create a model that captures the same
        model = CausalModel(
            data=data['df'],
            treatment=data['treatment_name'],
            outcome=data['outcome_name'],
            effect_modifiers=data['effect_modifier_names'],
            graph=data['gml_graph']
        )

        # Identify the effects within the model
        identified_estimand = model.identify_effect(
            proceed_when_unidentifiable=True
        )

        rl_estimate = None

        try:
            rl_estimate = model.estimate_effect(
                identified_estimand,
                method_name="backdoor.causalml.inference.meta.BaseRRegressor",
                method_params={"init_params":{
                        'learner':XGBRegressor()
                    }
                }
            )
        except ValueError:
            print("Error with respect to the number of samples")
        
        print("The R Learner estimate obtained:")
        print(rl_estimate)

Example #23

Source File: _parse.py From onnxmltools with MIT License

5 votes

def __init__(self, booster):
        self.booster_ = booster
        self.kwargs = _get_attributes(booster)

        if self.kwargs['num_class'] > 0:
            self.classes_ = self._generate_classes(self.kwargs)
            self.operator_name = 'XGBClassifier'
        else:
            self.operator_name = 'XGBRegressor'

Example #24

Source File: test_cml_TreeEnsembleRegressorConverterXGBoost.py From onnxmltools with MIT License

5 votes

def test_tree_ensemble_regressor_xgboost(self):
        
        this = os.path.dirname(__file__)
        data_train = pandas.read_csv(os.path.join(this, "xgboost.model.xgb.n4.d3.train.txt"), header=None)

        X = data_train.iloc[:, 1:].values
        y = data_train.iloc[:, 0].values

        params = dict(n_estimator=4, max_depth=3)
        model = XGBRegressor(**params).fit(X, y)
        # See https://github.com/apple/coremltools/issues/51.
        model.booster = model.get_booster
        model_coreml = convert_xgb_to_coreml(model)
        model_onnx = convert_cml(model_coreml)
        assert model_onnx is not None
        if sys.version_info[0] >= 3:
            # python 2.7 returns TypeError: can't pickle instancemethod objects
            dump_data_and_model(X.astype(numpy.float32), model, model_onnx,
                                         basename="CmlXGBoostRegressor-OneOff-Reshape",
                                         allow_failure=True)

Example #25

Source File: test_xgboost_converters.py From onnxmltools with MIT License

5 votes

def test_xgboost_10(self):
        this = os.path.abspath(os.path.dirname(__file__))
        train = os.path.join(this, "input_fail_train.csv")
        test = os.path.join(this, "input_fail_test.csv")
        
        param_distributions = {
            "colsample_bytree": 0.5,
            "gamma": 0.2,
            'learning_rate': 0.3,
            'max_depth': 2,
            'min_child_weight': 1.,
            'n_estimators': 1,
            'missing': np.nan,
        }
        
        train_df = pandas.read_csv(train)
        X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values
        test_df = pandas.read_csv(test)
        X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values
        
        regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions)
        regressor.fit(X_train, y_train)
        
        model_onnx = convert_xgboost(
            regressor, 'bug',
            [('input', FloatTensorType([None, X_train.shape[1]]))])

        dump_data_and_model(
            X_test.astype(np.float32),
            regressor, model_onnx,
            allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
            basename="XGBBoosterRegBug")

Example #26

Source File: test_xgboost.py From m2cgen with MIT License

5 votes

def test_regression():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1,
                                     max_depth=1, base_score=base_score)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.XGBoostModelAssemblerSelector(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.NumVal(base_score),
        ast.BinNumExpr(
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(12),
                    ast.NumVal(9.725),
                    ast.CompOpType.GTE),
                ast.NumVal(4.98425627),
                ast.NumVal(8.75091362)),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.941),
                    ast.CompOpType.GTE),
                ast.NumVal(8.34557438),
                ast.NumVal(3.9141891)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected)

Example #27

Source File: __init__.py From deepchem with MIT License

5 votes

def fit(self, dataset, **kwargs):
    """
    Fits XGBoost model to data.
    """
    X = dataset.X
    y = np.squeeze(dataset.y)
    w = np.squeeze(dataset.w)
    seed = self.model_instance.random_state
    import xgboost as xgb
    if isinstance(self.model_instance, xgb.XGBClassifier):
      xgb_metric = "auc"
      sklearn_metric = "roc_auc"
      stratify = y
    elif isinstance(self.model_instance, xgb.XGBRegressor):
      xgb_metric = "mae"
      sklearn_metric = "neg_mean_absolute_error"
      stratify = None
    best_param = self._search_param(sklearn_metric, X, y)
    # update model with best param
    self.model_instance = self.model_class(**best_param)

    # Find optimal n_estimators based on original learning_rate
    # and early_stopping_rounds
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=stratify)

    self.model_instance.fit(
        X_train,
        y_train,
        early_stopping_rounds=self.early_stopping_rounds,
        eval_metric=xgb_metric,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=self.verbose)
    # Since test size is 20%, when retrain model to whole data, expect
    # n_estimator increased to 1/0.8 = 1.25 time.
    estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25)
    self.model_instance.n_estimators = np.int64(estimated_best_round)
    self.model_instance.fit(X, y, eval_metric=xgb_metric, verbose=self.verbose)

Example #28

Source File: test_generalize.py From deepchem with MIT License

5 votes

def test_xgboost_regression(self):
    import xgboost
    np.random.seed(123)

    dataset = sklearn.datasets.load_diabetes()
    X, y = dataset.data, dataset.target
    frac_train = .7
    n_samples = len(X)
    n_train = int(frac_train * n_samples)
    X_train, y_train = X[:n_train], y[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    train_dataset = dc.data.NumpyDataset(X_train, y_train)
    test_dataset = dc.data.NumpyDataset(X_test, y_test)

    regression_metric = dc.metrics.Metric(dc.metrics.mae_score)
    # Set early stopping round = n_estimators so that esr won't work
    esr = {'early_stopping_rounds': 50}

    xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123)
    model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on test
    scores = model.evaluate(test_dataset, [regression_metric])
    assert scores[regression_metric.name] < 55

Example #29

Source File: test_generalize.py From deepchem with MIT License

5 votes

def test_xgboost_multitask_regression(self):
    import xgboost
    np.random.seed(123)
    n_tasks = 4
    tasks = range(n_tasks)
    dataset = sklearn.datasets.load_diabetes()
    X, y = dataset.data, dataset.target
    y = np.reshape(y, (len(y), 1))
    y = np.hstack([y] * n_tasks)

    frac_train = .7
    n_samples = len(X)
    n_train = int(frac_train * n_samples)
    X_train, y_train = X[:n_train], y[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    train_dataset = dc.data.DiskDataset.from_numpy(X_train, y_train)
    test_dataset = dc.data.DiskDataset.from_numpy(X_test, y_test)

    regression_metric = dc.metrics.Metric(dc.metrics.mae_score)
    esr = {'early_stopping_rounds': 50}

    def model_builder(model_dir):
      xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123)
      return dc.models.XGBoostModel(xgb_model, model_dir, verbose=False, **esr)

    model = dc.models.SingletaskToMultitask(tasks, model_builder)

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on test
    scores = model.evaluate(test_dataset, [regression_metric])
    for score in scores[regression_metric.name]:
      assert score < 50

Example #30

Source File: spark_tools.py From go-ml-transpiler with Apache License 2.0

5 votes

def load_spark_model(model_path, metadata_path):

    import xgboost as xgb
    import json
    import numpy as np

    if not isinstance(model_path, str) or not isinstance(model_path, str):
        raise ValueError("model and metadata paths must be str, not {0} and {1}".format(type(model_path), type(metadata_path)))

    with open(metadata_path) as f:
        metadata = json.loads(f.read().strip())

    xgb_class = metadata.get("class")
    if xgb_class == "ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel":
        clf = xgb.XGBClassifier()
        setattr(clf, "base_score", metadata["paramMap"]["baseScore"])
    elif xgb_class == "ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel":
        clf = xgb.XGBRegressor()
    else:
        raise ValueError("Unsupported model.")

    setattr(clf, "objective", metadata["paramMap"]["objective"])
    setattr(clf, "missing",
            np.nan if metadata["paramMap"]["missing"] in ["NaN", "nan", "null", "None"] else metadata["paramMap"][
                "missing"])
    setattr(clf, "booster", metadata["paramMap"].get("booster", "gbtree"))
    setattr(clf, "n_estimators", metadata["paramMap"].get("numRound", 1))

    booster = xgb.Booster()
    booster.load_model(model_path)

    clf._Booster = booster
    return clf