Python xgboost.XGBRegressor() Examples
The following are 30
code examples of xgboost.XGBRegressor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xgboost
, or try the search function
.
Example #1
Source File: XGBoost_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 13 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = xgb.XGBRegressor(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount, silent=True, objective='reg:gamma') model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = mse(data[:, -1], train_out) # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算MSE add_mse = mse(yanzhgdata[:, -1], add_yan) print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #2
Source File: run.py From nyaggle with MIT License | 10 votes |
def _dispatch_gbdt_class(algorithm_type: str, type_of_target: str): is_regression = type_of_target == 'continuous' if algorithm_type == 'lgbm': requires_lightgbm() from lightgbm import LGBMClassifier, LGBMRegressor return LGBMRegressor if is_regression else LGBMClassifier elif algorithm_type == 'cat': requires_catboost() from catboost import CatBoostClassifier, CatBoostRegressor return CatBoostRegressor if is_regression else CatBoostClassifier else: requires_xgboost() assert algorithm_type == 'xgb' from xgboost import XGBClassifier, XGBRegressor return XGBRegressor if is_regression else XGBClassifier
Example #3
Source File: test_xgboost_converters.py From onnxmltools with MIT License | 6 votes |
def test_xgb_regressor(self): iris = load_diabetes() x = iris.data y = iris.target x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) xgb = XGBRegressor() xgb.fit(x_train, y_train) conv_model = convert_xgboost( xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), xgb, conv_model, basename="SklearnXGBRegressor-Dec3", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
Example #4
Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License | 6 votes |
def opt_pro(optimization_protocol): opt = optimization_protocol(iterations=3, random_state=32, n_initial_points=1) opt.forge_experiment( model_initializer=XGBRegressor, model_init_params=dict( max_depth=Integer(2, 10), n_estimators=Integer(50, 300), learning_rate=Real(0.1, 0.9), subsample=0.5, booster=Categorical(["gbtree", "gblinear"]), ), model_extra_params=dict(fit=dict(eval_metric=Categorical(["rmse", "mae"]))), feature_engineer=FeatureEngineer([Categorical([nothing_transform], optional=True)]), ) opt.go() return opt ################################################## # Feature Engineering Steps ##################################################
Example #5
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_predict_single_feature_vector(self): # Train model training_data = datasets.make_regression(n_features=1) regressor = XGBRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1]] test_results = regressor.predict(np.asarray(test_data)) # Serialise the models to Elasticsearch feature_names = ["f0"] model_id = "test_xgb_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) # Single feature es_results = es_model.predict(test_data[0]) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #6
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_xgb_regressor(self): # Train model training_data = datasets.make_regression(n_features=5) regressor = XGBRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]] test_results = regressor.predict(np.asarray(test_data)) # Serialise the models to Elasticsearch feature_names = ["f0", "f1", "f2", "f3", "f4"] model_id = "test_xgb_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) es_results = es_model.predict(test_data) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #7
Source File: test_boosted_trees_regression_numeric.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _train_convert_evaluate_assert(self, bt_params={}, allowed_error={}, **params): """ Set up the unit test by loading the dataset and training a model. """ # Train a model xgb_model = xgboost.XGBRegressor(**params) xgb_model.fit(self.X, self.target) # Convert the model (feature_names can't be given because of XGboost) spec = xgb_converter.convert( xgb_model, self.feature_names, self.output_name, force_32bit_float=False ) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = xgb_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, target="target", verbose=False) self._check_metrics(metrics, bt_params, allowed_error)
Example #8
Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License | 6 votes |
def test_optional_step_matching(env_boston, feature_engineer): """Tests that a Space containing `optional` `Categorical` Feature Engineering steps matches with the expected saved Experiments. This regression test is focused on issues that arise when `EngineerStep`s other than the last one in the `FeatureEngineer` are `optional`. The simplified version of this test below, :func:`test_limited_optional_step_matching`, demonstrates that result matching works properly when only the final `EngineerStep` is `optional`""" opt_0 = DummyOptPro(iterations=20, random_state=32) opt_0.forge_experiment(XGBRegressor, feature_engineer=feature_engineer) opt_0.go() opt_1 = ExtraTreesOptPro(iterations=20, random_state=32) opt_1.forge_experiment(XGBRegressor, feature_engineer=feature_engineer) opt_1.get_ready() # Assert `opt_1` matched with all Experiments executed by `opt_0` assert len(opt_1.similar_experiments) == opt_0.successful_iterations
Example #9
Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License | 6 votes |
def test_optional_step_matching_by_exp(env_boston, es_0, es_1, es_2): """Test that the result of an Experiment is correctly matched by an OptPro with all-`optional` `EngineerStep` dimensions""" feature_engineer = [_ for _ in [es_0, es_1, es_2] if _ is not None] exp_0 = CVExperiment(XGBRegressor, feature_engineer=feature_engineer) opt_0 = ExtraTreesOptPro(iterations=1, random_state=32) opt_0.forge_experiment( XGBRegressor, feature_engineer=[ Categorical([es_a], optional=True), Categorical([es_b, es_c], optional=True), Categorical([es_d, es_e], optional=True), ], ) opt_0.get_ready() # Assert `opt_0` matched with `exp_0` assert len(opt_0.similar_experiments) == 1
Example #10
Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def plot_tree(self, num_trees=0, rankdir='UT', ax=None, **kwargs): """Plot specified tree. Parameters ---------- booster : Booster, XGBModel Booster or XGBModel instance num_trees : int, default 0 Specify the ordinal number of target tree rankdir : str, default "UT" Passed to graphiz via graph_attr ax : matplotlib Axes, default None Target axes instance. If None, new figure and axes will be created. kwargs : Other keywords passed to to_graphviz Returns ------- ax : matplotlib Axes """ import xgboost as xgb if not isinstance(self._df.estimator, xgb.XGBModel): raise ValueError('estimator must be XGBRegressor or XGBClassifier') return xgb.plot_tree(self._df.estimator, num_trees=num_trees, rankdir=rankdir, **kwargs)
Example #11
Source File: hockey_front_to_back.py From kaggle-code with MIT License | 5 votes |
def fit(self, X, y): """load the data in, initiate the models""" self.X = X self.y = y self.opt_XGBoost_reg = xgb.XGBRegressor(**self.opt_xgb_params) self.opt_forest_reg = RandomForestRegressor(**self.opt_rf_params) self.opt_svm_reg = SVR(**self.opt_svm_params) """ fit the models """ self.opt_XGBoost_reg.fit(self.X ,self.y) self.opt_forest_reg.fit(self.X ,self.y) self.opt_svm_reg.fit(self.X ,self.y)
Example #12
Source File: xgb_regressor.py From lale with Apache License 2.0 | 5 votes |
def fit(self, X, y, **fit_params): result = XGBRegressorImpl(self.max_depth, self.learning_rate, self.n_estimators, self.verbosity, self.silent, self.objective, self.booster, self.tree_method, self.n_jobs, self.nthread, self.gamma, self.min_child_weight, self.max_delta_step, self.subsample, self.colsample_bytree, self.colsample_bylevel, self.colsample_bynode, self.reg_alpha, self.reg_lambda, self.scale_pos_weight, self.base_score, self.random_state, self.seed, self.missing, self.importance_type) result._wrapped_model = XGBoostRegressor( **self.get_params()) if fit_params is None: result._wrapped_model.fit(X, y) else: result._wrapped_model.fit(X, y, **fit_params) return result
Example #13
Source File: test_saved_engineer_step.py From hyperparameter_hunter with MIT License | 5 votes |
def test_limited_optional_step_matching(env_boston, feature_engineer): """Simplified counterpart to above :func:`test_optional_step_matching`. Tests that a Space containing `Categorical` Feature Engineering steps -- of which only the last ones may be `optional` -- matches with the expected saved Experiments. These test cases do not demonstrate the same bug being regression-tested by `test_optional_step_matching`. Instead, this test function exists to ensure that the areas close to the above bug are behaving properly and to help define the bug being tested by `test_optional_step_matching`. This function demonstrates that `optional` is not problematic when used only in the final `EngineerStep`""" opt_0 = DummyOptPro(iterations=20, random_state=32) opt_0.forge_experiment(XGBRegressor, feature_engineer=feature_engineer) opt_0.go() opt_1 = ExtraTreesOptPro(iterations=20, random_state=32) opt_1.forge_experiment(XGBRegressor, feature_engineer=feature_engineer) opt_1.get_ready() # Assert `opt_1` matched with all Experiments executed by `opt_0` assert len(opt_1.similar_experiments) == opt_0.successful_iterations ################################################## # Exhaustive Experiment Matching Tests ################################################## # The tests in this section are still related to the regression tests above, but these are # conducted using a group of one-off Experiments, comprising all `FeatureEngineer` permutations # that should fit within the `feature_engineer` space of `opt_0`: # ``` # [ # Categorical([es_a], optional=True), # Categorical([es_b, es_c], optional=True), # Categorical([es_d, es_e], optional=True), # ] # ```
Example #14
Source File: test_xgboost_converters.py From sklearn-onnx with MIT License | 5 votes |
def setUpClass(self): def custom_parser(scope, model, inputs, custom_parsers=None): if custom_parsers is not None and model in custom_parsers: return custom_parsers[model]( scope, model, inputs, custom_parsers=custom_parsers) if not all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_): raise NotImplementedError( "Current converter does not support string labels.") return _parse_sklearn_classifier(scope, model, inputs) update_registered_converter( XGBClassifier, 'XGBClassifier', calculate_linear_classifier_output_shapes, convert_xgboost, parser=custom_parser, options={'zipmap': [True, False], 'nocl': [True, False]}) update_registered_converter( XGBRegressor, 'XGBRegressor', calculate_linear_regressor_output_shapes, convert_xgboost, options={'zipmap': [True, False], 'nocl': [True, False]})
Example #15
Source File: test_xgboost_converters.py From sklearn-onnx with MIT License | 5 votes |
def test_xgb_regressor(self): iris = load_iris() X = iris.data[:, :2] y = iris.target xgb = XGBRegressor() xgb.fit(X, y) conv_model = convert_sklearn( xgb, initial_types=[ ('input', FloatTensorType(shape=[None, X.shape[1]]))]) self.assertTrue(conv_model is not None) dump_single_regression(xgb, suffix="-Dec4")
Example #16
Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def XGBRegressor(self): import xgboost as xgb return xgb.XGBRegressor
Example #17
Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def plot_importance(self, ax=None, height=0.2, xlim=None, title='Feature importance', xlabel='F score', ylabel='Features', grid=True, **kwargs): """Plot importance based on fitted trees. Parameters ---------- ax : matplotlib Axes, default None Target axes instance. If None, new figure and axes will be created. height : float, default 0.2 Bar height, passed to ax.barh() xlim : tuple, default None Tuple passed to axes.xlim() title : str, default "Feature importance" Axes title. To disable, pass None. xlabel : str, default "F score" X axis title label. To disable, pass None. ylabel : str, default "Features" Y axis title label. To disable, pass None. kwargs : Other keywords passed to ax.barh() Returns ------- ax : matplotlib Axes """ import xgboost as xgb if not isinstance(self._df.estimator, xgb.XGBModel): raise ValueError('estimator must be XGBRegressor or XGBClassifier') # print(type(self._df.estimator.booster), self._df.estimator.booster) return xgb.plot_importance(self._df.estimator, ax=ax, height=height, xlim=xlim, title=title, xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)
Example #18
Source File: base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def to_graphviz(self, num_trees=0, rankdir='UT', yes_color='#0000FF', no_color='#FF0000', **kwargs): """Convert specified tree to graphviz instance. IPython can automatically plot the returned graphiz instance. Otherwise, you shoud call .render() method of the returned graphiz instance. Parameters ---------- num_trees : int, default 0 Specify the ordinal number of target tree rankdir : str, default "UT" Passed to graphiz via graph_attr yes_color : str, default '#0000FF' Edge color when meets the node condigion. no_color : str, default '#FF0000' Edge color when doesn't meet the node condigion. kwargs : Other keywords passed to graphviz graph_attr Returns ------- ax : matplotlib Axes """ import xgboost as xgb if not isinstance(self._df.estimator, xgb.XGBModel): raise ValueError('estimator must be XGBRegressor or XGBClassifier') return xgb.to_graphviz(self._df.estimator, num_trees=num_trees, rankdir=rankdir, yes_color=yes_color, no_color=no_color, **kwargs)
Example #19
Source File: xgboost_sklearnmodel.py From Supply-demand-forecasting with MIT License | 5 votes |
def setClf(self): self.clf = XGBRegressor(max_depth=7, learning_rate=0.01, n_estimators=100) return
Example #20
Source File: Scikit_Models.py From pyaf with BSD 3-Clause "New" or "Revised" License | 5 votes |
def build_Scikit_Model(self): import xgboost as xgb import sklearn.svm as svm lXGBOptions = self.mOptions.mXGBOptions; if(lXGBOptions is None): lXGBOptions = self.get_default_xgb_options() self.mScikitModel = xgb.XGBRegressor(**lXGBOptions)
Example #21
Source File: test_causalml_estimator.py From dowhy with MIT License | 5 votes |
def test_causalml_XLearner(self, init_data): # Defined a linear dataset with a given set of properties data = init_data # Create a model that captures the same model = CausalModel( data=data['df'], treatment=data['treatment_name'], outcome=data['outcome_name'], effect_modifiers=data['effect_modifier_names'], graph=data['gml_graph'] ) # Identify the effects within the model identified_estimand = model.identify_effect( proceed_when_unidentifiable=True ) xl_estimate = model.estimate_effect( identified_estimand, method_name="backdoor.causalml.inference.meta.BaseXRegressor", method_params={"init_params":{ 'learner':XGBRegressor() } } ) print("The X Learner estimate obtained:") print(xl_estimate)
Example #22
Source File: test_causalml_estimator.py From dowhy with MIT License | 5 votes |
def test_causalml_RLearner(self, init_data): # Defined a linear dataset with a given set of properties data = init_data # Create a model that captures the same model = CausalModel( data=data['df'], treatment=data['treatment_name'], outcome=data['outcome_name'], effect_modifiers=data['effect_modifier_names'], graph=data['gml_graph'] ) # Identify the effects within the model identified_estimand = model.identify_effect( proceed_when_unidentifiable=True ) rl_estimate = None try: rl_estimate = model.estimate_effect( identified_estimand, method_name="backdoor.causalml.inference.meta.BaseRRegressor", method_params={"init_params":{ 'learner':XGBRegressor() } } ) except ValueError: print("Error with respect to the number of samples") print("The R Learner estimate obtained:") print(rl_estimate)
Example #23
Source File: _parse.py From onnxmltools with MIT License | 5 votes |
def __init__(self, booster): self.booster_ = booster self.kwargs = _get_attributes(booster) if self.kwargs['num_class'] > 0: self.classes_ = self._generate_classes(self.kwargs) self.operator_name = 'XGBClassifier' else: self.operator_name = 'XGBRegressor'
Example #24
Source File: test_cml_TreeEnsembleRegressorConverterXGBoost.py From onnxmltools with MIT License | 5 votes |
def test_tree_ensemble_regressor_xgboost(self): this = os.path.dirname(__file__) data_train = pandas.read_csv(os.path.join(this, "xgboost.model.xgb.n4.d3.train.txt"), header=None) X = data_train.iloc[:, 1:].values y = data_train.iloc[:, 0].values params = dict(n_estimator=4, max_depth=3) model = XGBRegressor(**params).fit(X, y) # See https://github.com/apple/coremltools/issues/51. model.booster = model.get_booster model_coreml = convert_xgb_to_coreml(model) model_onnx = convert_cml(model_coreml) assert model_onnx is not None if sys.version_info[0] >= 3: # python 2.7 returns TypeError: can't pickle instancemethod objects dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="CmlXGBoostRegressor-OneOff-Reshape", allow_failure=True)
Example #25
Source File: test_xgboost_converters.py From onnxmltools with MIT License | 5 votes |
def test_xgboost_10(self): this = os.path.abspath(os.path.dirname(__file__)) train = os.path.join(this, "input_fail_train.csv") test = os.path.join(this, "input_fail_test.csv") param_distributions = { "colsample_bytree": 0.5, "gamma": 0.2, 'learning_rate': 0.3, 'max_depth': 2, 'min_child_weight': 1., 'n_estimators': 1, 'missing': np.nan, } train_df = pandas.read_csv(train) X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values test_df = pandas.read_csv(test) X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions) regressor.fit(X_train, y_train) model_onnx = convert_xgboost( regressor, 'bug', [('input', FloatTensorType([None, X_train.shape[1]]))]) dump_data_and_model( X_test.astype(np.float32), regressor, model_onnx, allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename="XGBBoosterRegBug")
Example #26
Source File: test_xgboost.py From m2cgen with MIT License | 5 votes |
def test_regression(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.get_regression_model_trainer()(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.NumVal(base_score), ast.BinNumExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.725), ast.CompOpType.GTE), ast.NumVal(4.98425627), ast.NumVal(8.75091362)), ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.941), ast.CompOpType.GTE), ast.NumVal(8.34557438), ast.NumVal(3.9141891)), ast.BinNumOpType.ADD), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
Example #27
Source File: __init__.py From deepchem with MIT License | 5 votes |
def fit(self, dataset, **kwargs): """ Fits XGBoost model to data. """ X = dataset.X y = np.squeeze(dataset.y) w = np.squeeze(dataset.w) seed = self.model_instance.random_state import xgboost as xgb if isinstance(self.model_instance, xgb.XGBClassifier): xgb_metric = "auc" sklearn_metric = "roc_auc" stratify = y elif isinstance(self.model_instance, xgb.XGBRegressor): xgb_metric = "mae" sklearn_metric = "neg_mean_absolute_error" stratify = None best_param = self._search_param(sklearn_metric, X, y) # update model with best param self.model_instance = self.model_class(**best_param) # Find optimal n_estimators based on original learning_rate # and early_stopping_rounds X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=seed, stratify=stratify) self.model_instance.fit( X_train, y_train, early_stopping_rounds=self.early_stopping_rounds, eval_metric=xgb_metric, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=self.verbose) # Since test size is 20%, when retrain model to whole data, expect # n_estimator increased to 1/0.8 = 1.25 time. estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25) self.model_instance.n_estimators = np.int64(estimated_best_round) self.model_instance.fit(X, y, eval_metric=xgb_metric, verbose=self.verbose)
Example #28
Source File: test_generalize.py From deepchem with MIT License | 5 votes |
def test_xgboost_regression(self): import xgboost np.random.seed(123) dataset = sklearn.datasets.load_diabetes() X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) n_train = int(frac_train * n_samples) X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[n_train:], y[n_train:] train_dataset = dc.data.NumpyDataset(X_train, y_train) test_dataset = dc.data.NumpyDataset(X_test, y_test) regression_metric = dc.metrics.Metric(dc.metrics.mae_score) # Set early stopping round = n_estimators so that esr won't work esr = {'early_stopping_rounds': 50} xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123) model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr) # Fit trained model model.fit(train_dataset) model.save() # Eval model on test scores = model.evaluate(test_dataset, [regression_metric]) assert scores[regression_metric.name] < 55
Example #29
Source File: test_generalize.py From deepchem with MIT License | 5 votes |
def test_xgboost_multitask_regression(self): import xgboost np.random.seed(123) n_tasks = 4 tasks = range(n_tasks) dataset = sklearn.datasets.load_diabetes() X, y = dataset.data, dataset.target y = np.reshape(y, (len(y), 1)) y = np.hstack([y] * n_tasks) frac_train = .7 n_samples = len(X) n_train = int(frac_train * n_samples) X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[n_train:], y[n_train:] train_dataset = dc.data.DiskDataset.from_numpy(X_train, y_train) test_dataset = dc.data.DiskDataset.from_numpy(X_test, y_test) regression_metric = dc.metrics.Metric(dc.metrics.mae_score) esr = {'early_stopping_rounds': 50} def model_builder(model_dir): xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123) return dc.models.XGBoostModel(xgb_model, model_dir, verbose=False, **esr) model = dc.models.SingletaskToMultitask(tasks, model_builder) # Fit trained model model.fit(train_dataset) model.save() # Eval model on test scores = model.evaluate(test_dataset, [regression_metric]) for score in scores[regression_metric.name]: assert score < 50
Example #30
Source File: spark_tools.py From go-ml-transpiler with Apache License 2.0 | 5 votes |
def load_spark_model(model_path, metadata_path): import xgboost as xgb import json import numpy as np if not isinstance(model_path, str) or not isinstance(model_path, str): raise ValueError("model and metadata paths must be str, not {0} and {1}".format(type(model_path), type(metadata_path))) with open(metadata_path) as f: metadata = json.loads(f.read().strip()) xgb_class = metadata.get("class") if xgb_class == "ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel": clf = xgb.XGBClassifier() setattr(clf, "base_score", metadata["paramMap"]["baseScore"]) elif xgb_class == "ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel": clf = xgb.XGBRegressor() else: raise ValueError("Unsupported model.") setattr(clf, "objective", metadata["paramMap"]["objective"]) setattr(clf, "missing", np.nan if metadata["paramMap"]["missing"] in ["NaN", "nan", "null", "None"] else metadata["paramMap"][ "missing"]) setattr(clf, "booster", metadata["paramMap"].get("booster", "gbtree")) setattr(clf, "n_estimators", metadata["paramMap"].get("numRound", 1)) booster = xgb.Booster() booster.load_model(model_path) clf._Booster = booster return clf