Python xgboost.DMatrix() Examples
The following are 30
code examples of xgboost.DMatrix().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xgboost
, or try the search function
.
Example #1
Source File: runner.py From ai-platform with MIT License | 8 votes |
def fit(self): """ Gets data and preprocess by prepare_data() function Trains with the selected parameters from grid search and saves the model """ data = self.get_input() df_train, df_test = self.prepare_data(data) xtr, ytr = df_train.drop(['Value'], axis=1), df_train['Value'].values xgbtrain = xgb.DMatrix(xtr, ytr) reg_cv = self.grid_search(xtr, ytr) param = reg_cv.best_params_ bst = xgb.train(dtrain=xgbtrain, params=param) # save model to file mlflow.sklearn.save_model(bst, "model") return df_test
Example #2
Source File: models.py From steppy-toolkit with MIT License | 7 votes |
def fit(self, X, y, X_valid, y_valid): logger.info('XGBoost, train data shape {}'.format(X.shape)) logger.info('XGBoost, validation data shape {}'.format(X_valid.shape)) logger.info('XGBoost, train labels shape {}'.format(y.shape)) logger.info('XGBoost, validation labels shape {}'.format(y_valid.shape)) train = xgb.DMatrix(data=X, label=y, **self.dmatrix_parameters) valid = xgb.DMatrix(data=X_valid, label=y_valid, **self.dmatrix_parameters) self.estimator = xgb.train(params=self.booster_parameters, dtrain=train, evals=[(train, 'train'), (valid, 'valid')], **self.training_parameters) return self
Example #3
Source File: test_transform.py From pydatalab with Apache License 2.0 | 6 votes |
def test_local_csv_transform(self): """Test transfrom from local csv files.""" cmd = ['python ' + os.path.join(CODE_PATH, 'transform.py'), '--csv=' + self.csv_input_filepath, '--analysis=' + self.analysis_dir, '--prefix=features', '--output=' + self.output_dir] print('cmd ', ' '.join(cmd)) subprocess.check_call(' '.join(cmd), shell=True) # Verify transformed file. libsvm_filepath = os.path.join(self.output_dir, 'features-00000-of-00001.libsvm') dtrain = xgb.DMatrix(libsvm_filepath) self.assertTrue(2056, dtrain.num_col()) self.assertTrue(3, dtrain.num_row()) # Verify featuremap file. featuremap_filepath = os.path.join(self.output_dir, 'featuremap-00000-of-00001.txt') df = pd.read_csv(featuremap_filepath, names=['index', 'description']) pd.util.testing.assert_series_equal(pd.Series(range(1, 2056), name='index'), df['index']) expected_descriptions = ['cat_col=Sunday', 'cat_col=Monday', 'img_col image feature 1000', 'num_col', 'text_col has "blue"'] self.assertTrue(all(x in df['description'].values for x in expected_descriptions))
Example #4
Source File: runner.py From ai-platform with MIT License | 6 votes |
def predict(self, df_test): """ Makes prediction for the next 7 days electricity consumption. """ # load model from file loaded_model = mlflow.sklearn.load_model("model") # make predictions for test data xts, yts = df_test.drop(['Value'], axis=1), df_test['Value'].values p = loaded_model.predict(xgb.DMatrix(xts)) prediction = pd.DataFrame({'Prediction': p}) mape, rmse, mae, r2 = ForecastRunner.evaluation_metrics(yts, p) print('MAPE: {}'.format(mape)) print('RMSE: {}'.format(rmse)) print('R2: {}'.format(r2)) print('MAE: {}'.format(mae)) mlflow.log_metric("MAPE", mape) mlflow.log_metric("RMSE", rmse) mlflow.log_metric("R2", r2) mlflow.log_metric("MAE", mae) ForecastRunner.plot_result(yts, p) self.save_output(df_test, prediction)
Example #5
Source File: predict.py From mars with Apache License 2.0 | 6 votes |
def execute(cls, ctx, op): from xgboost import DMatrix raw_data = data = ctx[op.data.key] if isinstance(data, tuple): data = ToDMatrix.get_xgb_dmatrix(data) else: data = DMatrix(data) result = op.model.predict(data) if isinstance(op.outputs[0], DATAFRAME_CHUNK_TYPE): result = pd.DataFrame(result, index=raw_data.index) elif isinstance(op.outputs[0], SERIES_CHUNK_TYPE): result = pd.Series(result, index=raw_data.index, name='predictions') ctx[op.outputs[0].key] = result
Example #6
Source File: test_boosted_trees_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston import numpy as np scikit_data = load_boston() t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 dtrain = xgboost.DMatrix( scikit_data.data, label=target, feature_names=scikit_data.feature_names ) self.xgb_model = xgboost.train({}, dtrain) self.target = target # Save the data and the model self.scikit_data = scikit_data self.n_classes = len(np.unique(self.target))
Example #7
Source File: ch06-06-wrapper.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 6 votes |
def evaluate(features): dtrain = xgb.DMatrix(tr_x[features], label=tr_y) dvalid = xgb.DMatrix(va_x[features], label=va_y) params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71} num_round = 10 # 実際にはもっと多いround数が必要 early_stopping_rounds = 3 watchlist = [(dtrain, 'train'), (dvalid, 'eval')] model = xgb.train(params, dtrain, num_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=0) va_pred = model.predict(dvalid) score = log_loss(va_y, va_pred) return score # --------------------------------- # Greedy Forward Selection # ----------------------------------
Example #8
Source File: test_boosted_trees_regression.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_XGBOOST: return if not _HAS_SKLEARN: return scikit_data = load_boston() dtrain = xgboost.DMatrix( scikit_data.data, label=scikit_data.target, feature_names=scikit_data.feature_names, ) xgb_model = xgboost.train({}, dtrain, 1) # Save the data and the model self.scikit_data = scikit_data self.xgb_model = xgb_model self.feature_names = self.scikit_data.feature_names
Example #9
Source File: model_xgb.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 6 votes |
def train(self, tr_x, tr_y, va_x=None, va_y=None): # データのセット validation = va_x is not None dtrain = xgb.DMatrix(tr_x, label=tr_y) if validation: dvalid = xgb.DMatrix(va_x, label=va_y) # ハイパーパラメータの設定 params = dict(self.params) num_round = params.pop('num_round') # 学習 if validation: early_stopping_rounds = params.pop('early_stopping_rounds') watchlist = [(dtrain, 'train'), (dvalid, 'eval')] self.model = xgb.train(params, dtrain, num_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds) else: watchlist = [(dtrain, 'train')] self.model = xgb.train(params, dtrain, num_round, evals=watchlist)
Example #10
Source File: encoder.py From sagemaker-xgboost-container with Apache License 2.0 | 6 votes |
def libsvm_to_dmatrix(string_like): # type: (bytes) -> xgb.DMatrix """Convert a LIBSVM string representation to a DMatrix object. Args: string_like (bytes): LIBSVM string. Returns: (xgb.DMatrix): XGBoost DataMatrix """ temp_file_location = None try: with tempfile.NamedTemporaryFile(delete=False) as libsvm_file: temp_file_location = libsvm_file.name libsvm_file.write(string_like) dmatrix = xgb.DMatrix(temp_file_location) finally: if temp_file_location and os.path.exists(temp_file_location): os.remove(temp_file_location) return dmatrix
Example #11
Source File: level2.py From kaggle-kuzushiji-2019 with MIT License | 6 votes |
def train_xgb(train_features, train_y, valid_features, valid_y, *, eta, num_boost_round): train_data = xgb.DMatrix(train_features, label=train_y) valid_data = xgb.DMatrix(valid_features, label=valid_y) params = { 'eta': eta, 'objective': 'binary:logistic', 'gamma': 0.01, 'max_depth': 8, } print(params) eval_list = [(valid_data, 'eval')] return xgb.train( params, train_data, num_boost_round, eval_list, early_stopping_rounds=20, verbose_eval=10, )
Example #12
Source File: wrap_xgb.py From gestalt with MIT License | 6 votes |
def fit(self, X, y, x_val=None, y_val=None): dtrain = xgb.DMatrix(X, label=y) if x_val is not None: dtest = xgb.DMatrix(x_val, label=y_val) watchlist = [(dtrain, 'train'), (dtest, 'validation')] self.xgb = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=self.num_round, early_stopping_rounds=self.early_stopping_rounds, evals=watchlist, verbose_eval=self.verbose) else: self.xgb = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=self.num_round, early_stopping_rounds=self.early_stopping_rounds, verbose_eval=self.verbose) return
Example #13
Source File: data_utils.py From sagemaker-xgboost-container with Apache License 2.0 | 6 votes |
def _get_parquet_dmatrix_file_mode(files_path): """Get Data Matrix from parquet data in file mode. :param files_path: File path where parquet formatted training data resides, either directory or file :return: xgb.DMatrix """ try: table = pq.read_table(files_path) data = table.to_pandas() del table if type(data) is pd.DataFrame: # pyarrow.Table.to_pandas may produce NumPy array or pandas DataFrame data = data.to_numpy() dmatrix = xgb.DMatrix(data[:, 1:], label=data[:, 0]) del data return dmatrix except Exception as e: raise exc.UserError("Failed to load parquet data with exception:\n{}".format(e))
Example #14
Source File: data_utils.py From sagemaker-xgboost-container with Apache License 2.0 | 6 votes |
def get_libsvm_dmatrix(files_path, is_pipe=False): """Get DMatrix from libsvm file path. Pipe mode not currently supported for libsvm. :param files_path: File path where LIBSVM formatted training data resides, either directory or file :param is_pipe: Boolean to indicate if data is being read in pipe mode :return: xgb.DMatrix """ if is_pipe: raise exc.UserError("Pipe mode not supported for LibSVM.") try: dmatrix = xgb.DMatrix(files_path) except Exception as e: raise exc.UserError("Failed to load libsvm data with exception:\n{}".format(e)) return dmatrix
Example #15
Source File: test_boosted_trees_regression_numeric.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUpClass(self): if not _HAS_XGBOOST: return if not _HAS_SKLEARN: return # Load data and train model scikit_data = load_boston() self.X = scikit_data.data.astype("f").astype("d") self.dtrain = xgboost.DMatrix( scikit_data.data, label=scikit_data.target, feature_names=scikit_data.feature_names, ) self.feature_names = scikit_data.feature_names self.output_name = "target"
Example #16
Source File: train_xgboost.py From jh-kaggle-util with Apache License 2.0 | 6 votes |
def train_model(self, x_train, y_train, x_val, y_val): print("Will train XGB for {} rounds, RandomSeed: {}".format(self.rounds, self.params['seed'])) xg_train = xgb.DMatrix(x_train, label=y_train) if y_val is None: watchlist = [(xg_train, 'train')] model = xgb.train(self.params, xg_train, self.rounds, watchlist) else: early_stop = self.rounds if self.early_stop == 0 else self.early_stop xg_val = xgb.DMatrix(x_val, label=y_val) watchlist = [(xg_train, 'train'), (xg_val, 'eval')] model = xgb.train(self.params, xg_train, self.rounds, watchlist, early_stopping_rounds=early_stop) self.steps = model.best_iteration return model
Example #17
Source File: wrap_xgb.py From gestalt with MIT License | 6 votes |
def fit(self, X, y, x_val=None, y_val=None): dtrain = xgb.DMatrix(X, label=y) if x_val is not None: dtest = xgb.DMatrix(x_val, label=y_val) watchlist = [(dtrain, 'train'), (dtest, 'validation')] self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=self.num_round, early_stopping_rounds=self.early_stopping_rounds, evals=watchlist, verbose_eval=self.verbose) else: self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=self.num_round, early_stopping_rounds=self.early_stopping_rounds) return
Example #18
Source File: dataset.py From ebonite with Apache License 2.0 | 5 votes |
def deserialize(self, obj: list) -> xgboost.DMatrix: try: return xgboost.DMatrix(obj) except (ValueError, TypeError): raise DeserializationError(f'given object: {obj} could not be converted to xgboost matrix')
Example #19
Source File: xgbranker.py From xgboostExtension with Apache License 2.0 | 5 votes |
def predict(self, X, output_margin=False, ntree_limit=0): X = check_array(X, accept_sparse=True) test_dmatrix = DMatrix(X[:,1:], missing=self.missing) rank_values = self.get_booster().predict(test_dmatrix, output_margin=output_margin, ntree_limit=ntree_limit) return rank_values
Example #20
Source File: ch05-01-validation.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(self, x): data = xgb.DMatrix(x) pred = self.model.predict(data) return pred # ----------------------------------- # hold-out法 # ----------------------------------- # hold-out法でのバリデーションデータの分割
Example #21
Source File: xgboost_models.py From gentun with Apache License 2.0 | 5 votes |
def cross_validate(self): """Train model using k-fold cross validation and return mean value of validation metric. """ d_train = xgb.DMatrix(self.x_train, label=self.y_train) # xgb calls its k-fold cross-validation parameter 'nfold' cv_result = xgb.cv( self.params, d_train, num_boost_round=self.num_boost_round, early_stopping_rounds=self.early_stopping_rounds, nfold=self.kfold ) return cv_result['test-{}-mean'.format(self.eval_metric)][-1]
Example #22
Source File: ch05-01-validation.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, tr_x, tr_y, va_x, va_y): params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71} params.update(self.params) num_round = 10 dtrain = xgb.DMatrix(tr_x, label=tr_y) dvalid = xgb.DMatrix(va_x, label=va_y) watchlist = [(dtrain, 'train'), (dvalid, 'eval')] self.model = xgb.train(params, dtrain, num_round, evals=watchlist)
Example #23
Source File: dataset.py From ebonite with Apache License 2.0 | 5 votes |
def process(self, obj: xgboost.DMatrix, **kwargs) -> DatasetType: return DMatrixDatasetType.from_dmatrix(obj)
Example #24
Source File: test_dataset.py From ebonite with Apache License 2.0 | 5 votes |
def test_deserialize__np(dtype_np, np_payload): dmatrix = dtype_np.deserialize(np_payload) assert isinstance(dmatrix, xgboost.DMatrix)
Example #25
Source File: dataset.py From ebonite with Apache License 2.0 | 5 votes |
def from_dmatrix(cls, dmatrix: xgboost.DMatrix): """ Factory method to extract :class:`~.DatasetType` from actual xgboost.DMatrix :param dmatrix: obj to create :class:`~.DatasetType` from :return: :class:`DMatrixDatasetType` """ is_from_list = (dmatrix.feature_names == [f'f{i}' for i in range(dmatrix.num_col())]) return DMatrixDatasetType(is_from_list, dmatrix.feature_types, dmatrix.feature_names)
Example #26
Source File: dataset.py From ebonite with Apache License 2.0 | 5 votes |
def serialize(self, instance: xgboost.DMatrix) -> list: """ Raises an error because there is no way to extract original data from DMatrix """ raise SerializationError('xgboost matrix does not support serialization')
Example #27
Source File: model.py From ebonite with Apache License 2.0 | 5 votes |
def _predict(self, data): if not isinstance(data, xgboost.DMatrix): data = xgboost.DMatrix(data) return self.model.predict(data)
Example #28
Source File: xgboost.py From mljar-supervised with MIT License | 5 votes |
def predict(self, X): if self.model is None: raise XgbAlgorithmException("Xgboost model is None") dtrain = xgb.DMatrix(X, missing=np.NaN) a = self.model.predict(dtrain, ntree_limit=self.best_ntree_limit) return a
Example #29
Source File: wrap_xgb.py From gestalt with MIT License | 5 votes |
def predict_proba(self, X): dtest = xgb.DMatrix(X) preds = self.clf.predict(dtest) return preds # Regressor Wrapper Class
Example #30
Source File: wrap_xgb.py From gestalt with MIT License | 5 votes |
def predict(self, X): dtest = xgb.DMatrix(X) preds = self.xgb.predict(dtest) return preds