Python lightgbm.Dataset() Examples
The following are 30
code examples of lightgbm.Dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lightgbm
, or try the search function
.
Example #1
Source File: avito2.py From MachineLearning with Apache License 2.0 | 9 votes |
def run_lgb(train_X, train_y, val_X, val_y, test_X): params = { "objective": "regression", "metric": "rmse", "num_leaves": 30, "learning_rate": 0.1, "bagging_fraction": 0.7, "feature_fraction": 0.7, "bagging_frequency": 5, "bagging_seed": 2018, "verbosity": -1 } lgtrain = lgb.Dataset(train_X, label=train_y) lgval = lgb.Dataset(val_X, label=val_y) evals_result = {} model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20, evals_result=evals_result) pred_test_y = model.predict(test_X, num_iteration=model.best_iteration) return pred_test_y, model, evals_result # Splitting the data for model training#
Example #2
Source File: automl.py From kddcup2019-automl with MIT License | 8 votes |
def hyperopt_lightgbm(X: pd.DataFrame, y: pd.Series, params: Dict, config: Config): X_train, X_val, y_train, y_val = data_split(X, y, test_size=0.5) train_data = lgb.Dataset(X_train, label=y_train) valid_data = lgb.Dataset(X_val, label=y_val) space = { "max_depth": hp.choice("max_depth", np.arange(2, 10, 1, dtype=int)), # smaller than 2^(max_depth) "num_leaves": hp.choice("num_leaves", np.arange(4, 200, 4, dtype=int)), "feature_fraction": hp.quniform("feature_fraction", 0.2, 0.8, 0.1), # "bagging_fraction": hp.quniform("bagging_fraction", 0.2, 0.8, 0.1), # "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 10, 2, dtype=int)), # "scale_pos_weight":hp.uniform('scale_pos_weight',1.0, 10.0), # "colsample_by_tree":hp.uniform("colsample_bytree",0.5,1.0), "min_child_weight": hp.quniform('min_child_weight', 2, 50, 2), "reg_alpha": hp.uniform("reg_alpha", 2.0, 8.0), "reg_lambda": hp.uniform("reg_lambda", 2.0, 8.0), "learning_rate": hp.quniform("learning_rate", 0.05, 0.4, 0.01), # "learning_rate": hp.loguniform("learning_rate", np.log(0.04), np.log(0.5)), # "min_data_in_leaf": hp.choice('min_data_in_leaf', np.arange(200, 2000, 100, dtype=int)), #"is_unbalance": hp.choice("is_unbalance", [True]) } def objective(hyperparams): model = lgb.train({**params, **hyperparams}, train_data, 300, valid_data, early_stopping_rounds=45, verbose_eval=0) score = model.best_score["valid_0"][params["metric"]] # in classification, less is better return {'loss': -score, 'status': STATUS_OK} trials = Trials() best = hyperopt.fmin(fn=objective, space=space, trials=trials, algo=tpe.suggest, max_evals=150, verbose=1, rstate=np.random.RandomState(1)) hyperparams = space_eval(space, best) log(f"auc = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}") return hyperparams
Example #3
Source File: automl.py From Kaggler with MIT License | 7 votes |
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100): X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle) train_data = lgb.Dataset(X_trn, label=y_trn) valid_data = lgb.Dataset(X_val, label=y_val) def objective(hyperparams): model = lgb.train({**self.params, **hyperparams}, train_data, self.n_est, valid_data, early_stopping_rounds=self.n_stop, verbose_eval=0) score = model.best_score["valid_0"][self.metric] * self.loss_sign return {'loss': score, 'status': STATUS_OK, 'model': model} trials = Trials() best = hyperopt.fmin(fn=objective, space=self.space, trials=trials, algo=tpe.suggest, max_evals=n_eval, verbose=1, rstate=self.random_state) hyperparams = space_eval(self.space, best) return hyperparams, trials
Example #4
Source File: level2.py From kaggle-kuzushiji-2019 with MIT License | 7 votes |
def train_lgb(train_features, train_y, valid_features, valid_y, *, lr, num_boost_round): train_data = lgb.Dataset(train_features, train_y) valid_data = lgb.Dataset(valid_features, valid_y, reference=train_data) params = { 'objective': 'binary', 'metric': 'binary_logloss', 'learning_rate': lr, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'feature_fraction': 0.9, 'min_data_in_leaf': 20, 'num_leaves': 41, 'scale_pos_weight': 1.2, 'lambda_l2': 1, } print(params) return lgb.train( params=params, train_set=train_data, num_boost_round=num_boost_round, early_stopping_rounds=20, valid_sets=[valid_data], verbose_eval=10, )
Example #5
Source File: optimize.py From optuna with MIT License | 6 votes |
def _get_booster_best_score(self, booster: "lgb.Booster") -> float: metric = self._get_metric_for_objective() valid_sets = self.lgbm_kwargs.get("valid_sets") # type: Optional[VALID_SET_TYPE] if self.lgbm_kwargs.get("valid_names") is not None: if type(self.lgbm_kwargs["valid_names"]) is str: valid_name = self.lgbm_kwargs["valid_names"] elif type(self.lgbm_kwargs["valid_names"]) in [list, tuple]: valid_name = self.lgbm_kwargs["valid_names"][-1] else: raise NotImplementedError elif type(valid_sets) is lgb.Dataset: valid_name = "valid_0" elif isinstance(valid_sets, (list, tuple)) and len(valid_sets) > 0: valid_set_idx = len(valid_sets) - 1 valid_name = "valid_{}".format(valid_set_idx) else: raise NotImplementedError val_score = booster.best_score[valid_name][metric] return val_score
Example #6
Source File: test_lightgbm.py From docker-python with Apache License 2.0 | 6 votes |
def test_gpu(self): lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin') lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train) params = { 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'auc', 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 1, 'device': 'gpu' } # Run only one round for faster test gbm = lgb.train(params, lgb_train, num_boost_round=1, valid_sets=lgb_eval, early_stopping_rounds=1) self.assertEqual(1, gbm.best_iteration)
Example #7
Source File: lightgbm.py From talkingdata-adtracking-fraud-detection with MIT License | 6 votes |
def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \ -> Tuple[Booster, dict]: if type(train) != pd.DataFrame or type(valid) != pd.DataFrame: raise ValueError('Parameter train and valid must be pandas.DataFrame') if list(train.columns) != list(valid.columns): raise ValueError('Train and valid must have a same column list') predictors = train.columns.drop(target) if weight is None: d_train = lgb.Dataset(train[predictors], label=train[target].values) else: print(weight) d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight) d_valid = lgb.Dataset(valid[predictors], label=valid[target].values) eval_results = {} model: Booster = lgb.train(params['model_params'], d_train, categorical_feature=categorical_features, valid_sets=[d_train, d_valid], valid_names=['train', 'valid'], evals_result=eval_results, **params['train_params']) return model, eval_results
Example #8
Source File: lgb_tune.py From ml-parameter-optimization with MIT License | 6 votes |
def get_n_estimators(self): """ returns optimal number of estimators using CV on training set """ lgb_param = {} for _params_key,_params_value in self._params.items(): if _params_key in self._dict_map.keys(): lgb_param[self._dict_map[_params_key]] = _params_value else: lgb_param[_params_key] = _params_value if self.balance_class: lgb_train = lgb.Dataset(self.X, label=self.y, weight=self.get_label_weights()) else: lgb_train = lgb.Dataset(self.X, label=self.y) kwargs_cv = {'num_boost_round':self.params['n_estimators'], 'nfold':self.params_cv['cv_folds'], 'early_stopping_rounds':self.params_cv['early_stopping_rounds'], 'stratified':self.params_cv['stratified']} try: # check if custom evalution function is specified if callable(self.params_cv['feval']): kwargs_cv['feval'] = self.params_cv['feval'] except KeyError: kwargs_cv['metrics'] = self.params_cv['metrics'] if type(self.categorical_feature)==list: kwargs_cv['categorical_feature'] = self.categorical_feature else: kwargs_cv['categorical_feature'] = 'auto' cvresult = lgb.cv(lgb_param,lgb_train,**kwargs_cv) self._params['n_estimators'] = int(len(cvresult[kwargs_cv['metrics'] + \ '-mean'])/(1-1/self.params_cv['cv_folds'])) return self
Example #9
Source File: models.py From steppy-toolkit with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid): self._check_target_shape_and_type(y, 'y') self._check_target_shape_and_type(y_valid, 'y_valid') y = self._format_target(y) y_valid = self._format_target(y_valid) logger.info('LightGBM transformer, train data shape {}'.format(X.shape)) logger.info('LightGBM transformer, validation data shape {}'.format(X_valid.shape)) logger.info('LightGBM transformer, train labels shape {}'.format(y.shape)) logger.info('LightGBM transformer, validation labels shape {}'.format(y_valid.shape)) data_train = lgb.Dataset(data=X, label=y, **self.dataset_parameters) data_valid = lgb.Dataset(data=X_valid, label=y_valid, **self.dataset_parameters) self.estimator = lgb.train(params=self.booster_parameters, train_set=data_train, valid_sets=[data_train, data_valid], valid_names=['data_train', 'data_valid'], **self.training_parameters) return self
Example #10
Source File: test_lightgbm.py From docker-python with Apache License 2.0 | 6 votes |
def test_cpu(self): lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin') lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train) params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': {'l2', 'auc'}, 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 0 } # Run only one round for faster test gbm = lgb.train(params, lgb_train, num_boost_round=1, valid_sets=lgb_eval, early_stopping_rounds=1) self.assertEqual(1, gbm.best_iteration)
Example #11
Source File: lgb_utils.py From autogluon with Apache License 2.0 | 6 votes |
def construct_dataset(x: DataFrame, y: Series, location=None, reference=None, params=None, save=False, weight=None): try_import_lightgbm() import lightgbm as lgb dataset = lgb.Dataset(data=x, label=y, reference=reference, free_raw_data=True, params=params, weight=weight) if save: assert location is not None saving_path = f'{location}.bin' if os.path.exists(saving_path): os.remove(saving_path) os.makedirs(os.path.dirname(saving_path), exist_ok=True) dataset.save_binary(saving_path) # dataset_binary = lgb.Dataset(location + '.bin', reference=reference, free_raw_data=False)# .construct() return dataset
Example #12
Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License | 6 votes |
def test_lightgbm_booster_multi_classifier(self): X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1, 2, 2] data = lightgbm.Dataset(X, label=y) model = lightgbm.train({'boosting_type': 'gbdt', 'objective': 'multiclass', 'n_estimators': 3, 'min_child_samples': 1, 'num_class': 3}, data) model_onnx, prefix = convert_model(model, 'tree-based classifier', [('input', FloatTensorType([None, 2]))]) dump_data_and_model(X, model, model_onnx, allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename=prefix + "BoosterBin" + model.__class__.__name__) try: from onnxruntime import InferenceSession except ImportError: # onnxruntime not installed (python 2.7) return sess = InferenceSession(model_onnx.SerializeToString()) out = sess.get_outputs() names = [o.name for o in out] assert names == ['label', 'probabilities']
Example #13
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 6 votes |
def test_lightgbm_cpu(booster): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb X1 = np.repeat(np.arange(10), 1000) X2 = np.repeat(np.arange(10), 1000) np.random.shuffle(X2) y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000)) data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2}) lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'regression', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250} lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=100, early_stopping_rounds=15, stratified=False, verbose_eval=50)
Example #14
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 6 votes |
def test_lightgbm_gpu(booster): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb X1 = np.repeat(np.arange(10), 1000) X2 = np.repeat(np.arange(10), 1000) np.random.shuffle(X2) y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000)) data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2}) lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'regression', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250, 'device_type': 'gpu', 'gpu_device_id': 0} lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=100, early_stopping_rounds=15, stratified=False, verbose_eval=50)
Example #15
Source File: lgb.py From kaggle-plasticc with MIT License | 6 votes |
def train_and_predict(train_df, test_df, features, params): oof_preds = np.zeros((len(train_df), params["num_class"])) test_preds = np.zeros((len(test_df), params["num_class"])) skf = StratifiedKFold(NUM_FOLDS, random_state=4) for train_index, val_index in skf.split(train_df, train_df["target"]): dev_df, val_df = train_df.iloc[train_index], train_df.iloc[val_index] lgb_train = lgb.Dataset(dev_df[features], dev_df["target"], weight=dev_df["sample_weight"]) lgb_val = lgb.Dataset(val_df[features], val_df["target"], weight=val_df["sample_weight"]) model = lgb.train(params, lgb_train, num_boost_round=200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=10, verbose_eval=50) oof_preds[val_index, :] = model.predict(val_df[features]) test_preds += model.predict(test_df[features]) / NUM_FOLDS return oof_preds, test_preds
Example #16
Source File: optimize.py From optuna with MIT License | 6 votes |
def _create_objective( self, target_param_names: List[str], train_set: "lgb.Dataset", step_name: str, pbar: tqdm.tqdm, ) -> _OptunaObjective: return _OptunaObjective( target_param_names, self.lgbm_params, train_set, self.lgbm_kwargs, self.best_score, step_name=step_name, model_dir=self._model_dir, pbar=pbar, )
Example #17
Source File: models.py From open-solution-mapping-challenge with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_params, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_params.number_boosting_rounds, early_stopping_rounds=self.training_params.early_stopping_rounds, verbose_eval=10, feval=self.evaluation_function) return self
Example #18
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def objective( trial, metric="binary_error", valid_name="valid_0", force_default_valid_names=False, cv=False ): # type: (optuna.trial.Trial, str, str, bool, bool) -> float dtrain = lgb.Dataset([[1.0], [2.0], [3.0]], label=[1.0, 0.0, 1.0]) dtest = lgb.Dataset([[1.0]], label=[1.0]) if force_default_valid_names: valid_names = None else: valid_names = [valid_name] pruning_callback = LightGBMPruningCallback(trial, metric, valid_name=valid_name) if cv: lgb.cv( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, verbose_eval=False, nfold=2, callbacks=[pruning_callback], ) else: lgb.train( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, valid_sets=[dtest], valid_names=valid_names, verbose_eval=False, callbacks=[pruning_callback], ) return 1.0
Example #19
Source File: lightgbm_simple.py From optuna with MIT License | 6 votes |
def objective(trial): data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) dtrain = lgb.Dataset(train_x, label=train_y) param = { "objective": "binary", "metric": "binary_logloss", "verbosity": -1, "boosting_type": "gbdt", "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0), "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0), "num_leaves": trial.suggest_int("num_leaves", 2, 256), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } gbm = lgb.train(param, dtrain) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) return accuracy
Example #20
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_metrics_with_multi_validation_data_and_metrics(bst_params, train_set): mlflow.lightgbm.autolog() evals_result = {} params = {'metric': ['multi_error', 'multi_logloss']} params.update(bst_params) valid_sets = [train_set, lgb.Dataset(train_set.data)] valid_names = ['train', 'valid'] lgb.train(params, train_set, num_boost_round=10, valid_sets=valid_sets, valid_names=valid_names, evals_result=evals_result) run = get_latest_run() data = run.data client = mlflow.tracking.MlflowClient() for valid_name in valid_names: for metric_name in params['metric']: metric_key = '{}-{}'.format(valid_name, metric_name) metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)] assert metric_key in data.metrics assert len(metric_history) == 10 assert metric_history == evals_result[valid_name][metric_name]
Example #21
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_metrics_with_multi_validation_data(bst_params, train_set): mlflow.lightgbm.autolog() evals_result = {} # If we use [train_set, train_set] here, LightGBM ignores the first dataset. # To avoid that, create a new Dataset object. valid_sets = [train_set, lgb.Dataset(train_set.data)] valid_names = ['train', 'valid'] lgb.train(bst_params, train_set, num_boost_round=10, valid_sets=valid_sets, valid_names=valid_names, evals_result=evals_result) run = get_latest_run() data = run.data client = mlflow.tracking.MlflowClient() for valid_name in valid_names: metric_key = '{}-multi_logloss'.format(valid_name) metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)] assert metric_key in data.metrics assert len(metric_history) == 10 assert metric_history == evals_result[valid_name]['multi_logloss']
Example #22
Source File: model_v1.py From Quora with MIT License | 6 votes |
def get_dataset(self, X, y, free_raw_data=True): """ convert data into lightgbm consumable format Parameters ---------- X: string, numpy array, pandas DataFrame, scipy.sparse or list of numpy arrays y: list, numpy 1-D array, pandas Series / one-column DataFrame \ or None, optional (default=None) free_raw_data: bool, optional (default=True) Return ------ lightgbm dataset """ return lightgbm.Dataset( data=X, label=y, feature_name=self.feature_name, categorical_feature=self.categorical_feature, free_raw_data=free_raw_data)
Example #23
Source File: models.py From open-solution-data-science-bowl-2018 with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_params, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_params.number_boosting_rounds, early_stopping_rounds=self.training_params.early_stopping_rounds, verbose_eval=10, feval=self.evaluation_function) return self
Example #24
Source File: misc.py From open-solution-data-science-bowl-2018 with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_config, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_config.number_boosting_rounds, early_stopping_rounds=self.training_config.early_stopping_rounds, verbose_eval=self.model_config.verbose, feval=self.evaluation_function) return self
Example #25
Source File: main.py From nni with MIT License | 6 votes |
def load_data(train_path='./data/regression.train', test_path='./data/regression.test'): ''' Load or create dataset ''' print('Load data...') df_train = pd.read_csv(train_path, header=None, sep='\t') df_test = pd.read_csv(test_path, header=None, sep='\t') num = len(df_train) split_num = int(0.9 * num) y_train = df_train[0].values y_test = df_test[0].values y_eval = y_train[split_num:] y_train = y_train[:split_num] X_train = df_train.drop(0, axis=1).values X_test = df_test.drop(0, axis=1).values X_eval = X_train[split_num:, :] X_train = X_train[:split_num, :] # create dataset for lightgbm lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train) return lgb_train, lgb_eval, X_test, y_test
Example #26
Source File: lightgbm_example.py From ray with Apache License 2.0 | 6 votes |
def train_breast_cancer(config): data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) train_x, test_x, train_y, test_y = train_test_split( data, target, test_size=0.25) train_set = lgb.Dataset(train_x, label=train_y) test_set = lgb.Dataset(test_x, label=test_y) gbm = lgb.train( config, train_set, valid_sets=[test_set], verbose_eval=False, callbacks=[LightGBMCallback]) preds = gbm.predict(test_x) pred_labels = np.rint(preds) tune.report( mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True)
Example #27
Source File: misc.py From open-solution-mapping-challenge with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_config, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_config.number_boosting_rounds, early_stopping_rounds=self.training_config.early_stopping_rounds, verbose_eval=self.model_config.verbose, feval=self.evaluation_function) return self
Example #28
Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License | 5 votes |
def test_lightgbm_booster_classifier(self): X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1] data = lightgbm.Dataset(X, label=y) model = lightgbm.train({'boosting_type': 'gbdt', 'objective': 'binary', 'n_estimators': 3, 'min_child_samples': 1}, data) model_onnx, prefix = convert_model(model, 'tree-based classifier', [('input', FloatTensorType([None, 2]))]) dump_data_and_model(X, model, model_onnx, allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename=prefix + "BoosterBin" + model.__class__.__name__)
Example #29
Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License | 5 votes |
def test_lightgbm_booster_classifier_zipmap(self): X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1] data = lightgbm.Dataset(X, label=y) model = lightgbm.train({'boosting_type': 'gbdt', 'objective': 'binary', 'n_estimators': 3, 'min_child_samples': 1}, data) model_onnx, prefix = convert_model(model, 'tree-based classifier', [('input', FloatTensorType([None, 2]))]) assert "zipmap" in str(model_onnx).lower() dump_data_and_model(X, model, model_onnx, allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename=prefix + "BoosterBin" + model.__class__.__name__)
Example #30
Source File: lgb_trial.py From autogluon with Apache License 2.0 | 5 votes |
def lgb_trial(args, reporter): """ Training script for hyperparameter evaluation of Gradient Boosting model """ try: model, args, util_args = model_trial.prepare_inputs(args=args) try_import_lightgbm() import lightgbm as lgb dataset_train = lgb.Dataset(util_args.directory + util_args.dataset_train_filename) dataset_val = lgb.Dataset(util_args.directory + util_args.dataset_val_filename) X_val, y_val = load_pkl.load(util_args.directory + util_args.dataset_val_pkl_filename) fit_model_args = dict(dataset_train=dataset_train, dataset_val=dataset_val) predict_proba_args = dict(X=X_val) model_trial.fit_and_save_model(model=model, params=args, fit_args=fit_model_args, predict_proba_args=predict_proba_args, y_test=y_val, time_start=util_args.time_start, time_limit=util_args.get('time_limit', None), reporter=reporter) except Exception as e: if not isinstance(e, TimeLimitExceeded): logger.exception(e, exc_info=True) reporter.terminate() # FIXME: If stopping metric and eval metric differ, the previous reported scores will not align as they will be evaluated with stopping_metric, whereas this is evaluated with eval_metric # This should only impact if the reporter data is used # FIXME: If stopping metric score > eval metric score, stopping metric score will be recorded as best score, this is a defect! # FIXME: It might be the case that if a reporter has been recorded and the model crash, AutoGluon will try to access the invalid model and fail. # reporter(epoch=model.params_trained['num_boost_round'] + 1, validation_performance=score)