Python lightgbm.cv() Examples
The following are 11
code examples of lightgbm.cv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lightgbm
, or try the search function
.
Example #1
Source File: optimize.py From optuna with MIT License | 8 votes |
def __call__(self, trial: optuna.trial.Trial) -> float: self._preprocess(trial) start_time = time.time() cv_results = lgb.cv(self.lgbm_params, self.train_set, **self.lgbm_kwargs) val_scores = self._get_cv_scores(cv_results) val_score = val_scores[-1] elapsed_secs = time.time() - start_time average_iteration_time = elapsed_secs / len(val_scores) if self.compare_validation_metrics(val_score, self.best_score): self.best_score = val_score self._postprocess(trial, elapsed_secs, average_iteration_time) return val_score
Example #2
Source File: lgb_tune.py From ml-parameter-optimization with MIT License | 6 votes |
def get_n_estimators(self): """ returns optimal number of estimators using CV on training set """ lgb_param = {} for _params_key,_params_value in self._params.items(): if _params_key in self._dict_map.keys(): lgb_param[self._dict_map[_params_key]] = _params_value else: lgb_param[_params_key] = _params_value if self.balance_class: lgb_train = lgb.Dataset(self.X, label=self.y, weight=self.get_label_weights()) else: lgb_train = lgb.Dataset(self.X, label=self.y) kwargs_cv = {'num_boost_round':self.params['n_estimators'], 'nfold':self.params_cv['cv_folds'], 'early_stopping_rounds':self.params_cv['early_stopping_rounds'], 'stratified':self.params_cv['stratified']} try: # check if custom evalution function is specified if callable(self.params_cv['feval']): kwargs_cv['feval'] = self.params_cv['feval'] except KeyError: kwargs_cv['metrics'] = self.params_cv['metrics'] if type(self.categorical_feature)==list: kwargs_cv['categorical_feature'] = self.categorical_feature else: kwargs_cv['categorical_feature'] = 'auto' cvresult = lgb.cv(lgb_param,lgb_train,**kwargs_cv) self._params['n_estimators'] = int(len(cvresult[kwargs_cv['metrics'] + \ '-mean'])/(1-1/self.params_cv['cv_folds'])) return self
Example #3
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def test_lightgbm_pruning_callback_call(cv): # type: (bool) -> None callback_env = partial( lgb.callback.CallbackEnv, model="test", params={}, begin_iteration=0, end_iteration=1, iteration=1, ) if cv: env = callback_env(evaluation_result_list=[(("cv_agg", "binary_error", 1.0, False, 1.0))]) else: env = callback_env(evaluation_result_list=[("validation", "binary_error", 1.0, False)]) # The pruner is deactivated. study = optuna.create_study(pruner=DeterministicPruner(False)) trial = create_running_trial(study, 1.0) pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation") pruning_callback(env) # The pruner is activated. study = optuna.create_study(pruner=DeterministicPruner(True)) trial = create_running_trial(study, 1.0) pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation") with pytest.raises(optuna.TrialPruned): pruning_callback(env)
Example #4
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def test_lightgbm_pruning_callback(cv): # type: (bool) -> None study = optuna.create_study(pruner=DeterministicPruner(True)) study.optimize(partial(objective, cv=cv), n_trials=1) assert study.trials[0].state == optuna.trial.TrialState.PRUNED study = optuna.create_study(pruner=DeterministicPruner(False)) study.optimize(partial(objective, cv=cv), n_trials=1) assert study.trials[0].state == optuna.trial.TrialState.COMPLETE assert study.trials[0].value == 1.0 # Use non default validation name. custom_valid_name = "my_validation" study = optuna.create_study(pruner=DeterministicPruner(False)) study.optimize(lambda trial: objective(trial, valid_name=custom_valid_name, cv=cv), n_trials=1) assert study.trials[0].state == optuna.trial.TrialState.COMPLETE assert study.trials[0].value == 1.0 # Check "maximize" direction. study = optuna.create_study(pruner=DeterministicPruner(True), direction="maximize") study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) assert study.trials[0].state == optuna.trial.TrialState.PRUNED study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize") study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) assert study.trials[0].state == optuna.trial.TrialState.COMPLETE assert study.trials[0].value == 1.0
Example #5
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def test_lightgbm_pruning_callback_errors(cv): # type: (bool) -> None # Unknown metric study = optuna.create_study(pruner=DeterministicPruner(False)) with pytest.raises(ValueError): study.optimize( lambda trial: objective(trial, metric="foo_metric", cv=cv), n_trials=1, catch=() ) if not cv: # Unknown validation name study = optuna.create_study(pruner=DeterministicPruner(False)) with pytest.raises(ValueError): study.optimize( lambda trial: objective( trial, valid_name="valid_1", force_default_valid_names=True ), n_trials=1, catch=(), ) # Check consistency of study direction. study = optuna.create_study(pruner=DeterministicPruner(False)) with pytest.raises(ValueError): study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize") with pytest.raises(ValueError): study.optimize( lambda trial: objective(trial, metric="binary_error", cv=cv), n_trials=1, catch=() )
Example #6
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def objective( trial, metric="binary_error", valid_name="valid_0", force_default_valid_names=False, cv=False ): # type: (optuna.trial.Trial, str, str, bool, bool) -> float dtrain = lgb.Dataset([[1.0], [2.0], [3.0]], label=[1.0, 0.0, 1.0]) dtest = lgb.Dataset([[1.0]], label=[1.0]) if force_default_valid_names: valid_names = None else: valid_names = [valid_name] pruning_callback = LightGBMPruningCallback(trial, metric, valid_name=valid_name) if cv: lgb.cv( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, verbose_eval=False, nfold=2, callbacks=[pruning_callback], ) else: lgb.train( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, valid_sets=[dtest], valid_names=valid_names, verbose_eval=False, callbacks=[pruning_callback], ) return 1.0
Example #7
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 6 votes |
def test_lightgbm_gpu(booster): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb X1 = np.repeat(np.arange(10), 1000) X2 = np.repeat(np.arange(10), 1000) np.random.shuffle(X2) y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000)) data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2}) lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'regression', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250, 'device_type': 'gpu', 'gpu_device_id': 0} lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=100, early_stopping_rounds=15, stratified=False, verbose_eval=50)
Example #8
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 6 votes |
def test_lightgbm_cpu(booster): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb X1 = np.repeat(np.arange(10), 1000) X2 = np.repeat(np.arange(10), 1000) np.random.shuffle(X2) y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000)) data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2}) lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'regression', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250} lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=100, early_stopping_rounds=15, stratified=False, verbose_eval=50)
Example #9
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 5 votes |
def test_lightgbm_cpu_airlines_full(booster): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb data = pd.read_csv('./open_data/allyears.1987.2013.zip', dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category', 'TailNum': 'category', 'CancellationCode': 'category', 'IsArrDelayed': 'category', 'IsDepDelayed': 'category', 'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32, 'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32, 'CRSElapsedTime': np.float32, 'AirTime': np.float32, 'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32, 'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32, 'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32, 'DayofMonth': np.int32, 'Cancelled': 'category', 'CarrierDelay': np.float32, 'WeatherDelay': np.float32, 'NASDelay': np.float32, 'SecurityDelay': np.float32, 'LateAircraftDelay': np.float32}) y = data["IsArrDelayed"].cat.codes data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime', 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum', 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay', 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut', 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']] lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'binary', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250} lgb_train = lgb.Dataset(data=data, label=y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=50, early_stopping_rounds=5, stratified=False, verbose_eval=10)
Example #10
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 5 votes |
def test_lightgbm_cpu_airlines_year(booster, year): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb data = pd.read_csv('./open_data/airlines/year{0}.zip'.format(year), dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category', 'TailNum': 'category', 'CancellationCode': 'category', 'IsArrDelayed': 'category', 'IsDepDelayed': 'category', 'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32, 'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32, 'CRSElapsedTime': np.float32, 'AirTime': np.float32, 'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32, 'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32, 'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32, 'DayofMonth': np.int32, 'Cancelled': 'category', 'CarrierDelay': np.float32, 'WeatherDelay': np.float32, 'NASDelay': np.float32, 'SecurityDelay': np.float32, 'LateAircraftDelay': np.float32}) y = data["IsArrDelayed"].cat.codes data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime', 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum', 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay', 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut', 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']] lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'binary', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250} lgb_train = lgb.Dataset(data=data, label=y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=50, early_stopping_rounds=5, stratified=False, verbose_eval=10)
Example #11
Source File: test_lightgbm.py From h2o4gpu with Apache License 2.0 | 5 votes |
def test_lightgbm_gpu_airlines_year(booster, year): import numpy as np import pandas as pd from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb import lightgbm as lgb data = pd.read_csv('./open_data/airlines/year{0}.zip'.format(year), dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category', 'TailNum': 'category', 'CancellationCode': 'category', 'IsArrDelayed': 'category', 'IsDepDelayed': 'category', 'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32, 'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32, 'CRSElapsedTime': np.float32, 'AirTime': np.float32, 'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32, 'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32, 'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32, 'DayofMonth': np.int32, 'Cancelled': 'category', 'CarrierDelay': np.float32, 'WeatherDelay': np.float32, 'NASDelay': np.float32, 'SecurityDelay': np.float32, 'LateAircraftDelay': np.float32}) y = data["IsArrDelayed"].cat.codes data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime', 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum', 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay', 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut', 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']] lgb_params = {'learning_rate': 0.1, 'boosting': booster, 'objective': 'binary', 'metric': 'rmse', 'feature_fraction': 0.9, 'bagging_fraction': 0.75, 'num_leaves': 31, 'bagging_freq': 1, 'min_data_per_leaf': 250, 'device_type': 'gpu', 'gpu_device_id': 0} lgb_train = lgb.Dataset(data=data, label=y) cv = lgb.cv(lgb_params, lgb_train, num_boost_round=50, early_stopping_rounds=5, stratified=False, verbose_eval=10)