Python lightgbm.cv() Examples

The following are 11 code examples of lightgbm.cv(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lightgbm , or try the search function .
Example #1
Source File: optimize.py    From optuna with MIT License 8 votes vote down vote up
def __call__(self, trial: optuna.trial.Trial) -> float:

        self._preprocess(trial)

        start_time = time.time()
        cv_results = lgb.cv(self.lgbm_params, self.train_set, **self.lgbm_kwargs)

        val_scores = self._get_cv_scores(cv_results)
        val_score = val_scores[-1]
        elapsed_secs = time.time() - start_time
        average_iteration_time = elapsed_secs / len(val_scores)

        if self.compare_validation_metrics(val_score, self.best_score):
            self.best_score = val_score

        self._postprocess(trial, elapsed_secs, average_iteration_time)

        return val_score 
Example #2
Source File: lgb_tune.py    From ml-parameter-optimization with MIT License 6 votes vote down vote up
def get_n_estimators(self):
        """
        returns optimal number of estimators using CV on training set
        """
        lgb_param = {}
        for _params_key,_params_value in self._params.items():
            if _params_key in self._dict_map.keys():
                lgb_param[self._dict_map[_params_key]] = _params_value
            else:
                lgb_param[_params_key] = _params_value
        
        if self.balance_class:
            lgb_train = lgb.Dataset(self.X, label=self.y, weight=self.get_label_weights())
        else:
            lgb_train = lgb.Dataset(self.X, label=self.y)
        
        kwargs_cv = {'num_boost_round':self.params['n_estimators'],
                     'nfold':self.params_cv['cv_folds'],
                     'early_stopping_rounds':self.params_cv['early_stopping_rounds'],
                     'stratified':self.params_cv['stratified']}
        
        try: # check if custom evalution function is specified
            if callable(self.params_cv['feval']):
                kwargs_cv['feval'] = self.params_cv['feval']
        except KeyError:
            kwargs_cv['metrics'] = self.params_cv['metrics']
        
        if type(self.categorical_feature)==list:
            kwargs_cv['categorical_feature'] = self.categorical_feature
        else:
            kwargs_cv['categorical_feature'] = 'auto'
        
        cvresult = lgb.cv(lgb_param,lgb_train,**kwargs_cv)
        self._params['n_estimators'] = int(len(cvresult[kwargs_cv['metrics'] + \
                                            '-mean'])/(1-1/self.params_cv['cv_folds']))
        return self 
Example #3
Source File: test_lightgbm.py    From optuna with MIT License 6 votes vote down vote up
def test_lightgbm_pruning_callback_call(cv):
    # type: (bool) -> None

    callback_env = partial(
        lgb.callback.CallbackEnv,
        model="test",
        params={},
        begin_iteration=0,
        end_iteration=1,
        iteration=1,
    )

    if cv:
        env = callback_env(evaluation_result_list=[(("cv_agg", "binary_error", 1.0, False, 1.0))])
    else:
        env = callback_env(evaluation_result_list=[("validation", "binary_error", 1.0, False)])

    # The pruner is deactivated.
    study = optuna.create_study(pruner=DeterministicPruner(False))
    trial = create_running_trial(study, 1.0)
    pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation")
    pruning_callback(env)

    # The pruner is activated.
    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = create_running_trial(study, 1.0)
    pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation")
    with pytest.raises(optuna.TrialPruned):
        pruning_callback(env) 
Example #4
Source File: test_lightgbm.py    From optuna with MIT License 6 votes vote down vote up
def test_lightgbm_pruning_callback(cv):
    # type: (bool) -> None

    study = optuna.create_study(pruner=DeterministicPruner(True))
    study.optimize(partial(objective, cv=cv), n_trials=1)
    assert study.trials[0].state == optuna.trial.TrialState.PRUNED

    study = optuna.create_study(pruner=DeterministicPruner(False))
    study.optimize(partial(objective, cv=cv), n_trials=1)
    assert study.trials[0].state == optuna.trial.TrialState.COMPLETE
    assert study.trials[0].value == 1.0

    # Use non default validation name.
    custom_valid_name = "my_validation"
    study = optuna.create_study(pruner=DeterministicPruner(False))
    study.optimize(lambda trial: objective(trial, valid_name=custom_valid_name, cv=cv), n_trials=1)
    assert study.trials[0].state == optuna.trial.TrialState.COMPLETE
    assert study.trials[0].value == 1.0

    # Check "maximize" direction.
    study = optuna.create_study(pruner=DeterministicPruner(True), direction="maximize")
    study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=())
    assert study.trials[0].state == optuna.trial.TrialState.PRUNED

    study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize")
    study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=())
    assert study.trials[0].state == optuna.trial.TrialState.COMPLETE
    assert study.trials[0].value == 1.0 
Example #5
Source File: test_lightgbm.py    From optuna with MIT License 6 votes vote down vote up
def test_lightgbm_pruning_callback_errors(cv):
    # type: (bool) -> None

    # Unknown metric
    study = optuna.create_study(pruner=DeterministicPruner(False))
    with pytest.raises(ValueError):
        study.optimize(
            lambda trial: objective(trial, metric="foo_metric", cv=cv), n_trials=1, catch=()
        )

    if not cv:
        # Unknown validation name
        study = optuna.create_study(pruner=DeterministicPruner(False))
        with pytest.raises(ValueError):
            study.optimize(
                lambda trial: objective(
                    trial, valid_name="valid_1", force_default_valid_names=True
                ),
                n_trials=1,
                catch=(),
            )

    # Check consistency of study direction.
    study = optuna.create_study(pruner=DeterministicPruner(False))
    with pytest.raises(ValueError):
        study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=())

    study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize")
    with pytest.raises(ValueError):
        study.optimize(
            lambda trial: objective(trial, metric="binary_error", cv=cv), n_trials=1, catch=()
        ) 
Example #6
Source File: test_lightgbm.py    From optuna with MIT License 6 votes vote down vote up
def objective(
    trial, metric="binary_error", valid_name="valid_0", force_default_valid_names=False, cv=False
):
    # type: (optuna.trial.Trial, str, str, bool, bool) -> float

    dtrain = lgb.Dataset([[1.0], [2.0], [3.0]], label=[1.0, 0.0, 1.0])
    dtest = lgb.Dataset([[1.0]], label=[1.0])

    if force_default_valid_names:
        valid_names = None
    else:
        valid_names = [valid_name]

    pruning_callback = LightGBMPruningCallback(trial, metric, valid_name=valid_name)
    if cv:
        lgb.cv(
            {"objective": "binary", "metric": ["auc", "binary_error"]},
            dtrain,
            1,
            verbose_eval=False,
            nfold=2,
            callbacks=[pruning_callback],
        )
    else:
        lgb.train(
            {"objective": "binary", "metric": ["auc", "binary_error"]},
            dtrain,
            1,
            valid_sets=[dtest],
            valid_names=valid_names,
            verbose_eval=False,
            callbacks=[pruning_callback],
        )
    return 1.0 
Example #7
Source File: test_lightgbm.py    From h2o4gpu with Apache License 2.0 6 votes vote down vote up
def test_lightgbm_gpu(booster):
    import numpy as np
    import pandas as pd
    from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgb

    X1 = np.repeat(np.arange(10), 1000)
    X2 = np.repeat(np.arange(10), 1000)
    np.random.shuffle(X2)

    y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000))
    data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2})

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'regression',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250, 'device_type': 'gpu', 'gpu_device_id': 0}
    lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=100,
                early_stopping_rounds=15,
                stratified=False,
                verbose_eval=50) 
Example #8
Source File: test_lightgbm.py    From h2o4gpu with Apache License 2.0 6 votes vote down vote up
def test_lightgbm_cpu(booster):
    import numpy as np
    import pandas as pd
    from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgb

    X1 = np.repeat(np.arange(10), 1000)
    X2 = np.repeat(np.arange(10), 1000)
    np.random.shuffle(X2)

    y = (X1 + np.random.randn(10000)) * (X2 + np.random.randn(10000))
    data = pd.DataFrame({'y': y, 'X1': X1, 'X2': X2})

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'regression',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250}
    lgb_train = lgb.Dataset(data=data[['X1', 'X2']], label=data.y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=100,
                early_stopping_rounds=15,
                stratified=False,
                verbose_eval=50) 
Example #9
Source File: test_lightgbm.py    From h2o4gpu with Apache License 2.0 5 votes vote down vote up
def test_lightgbm_cpu_airlines_full(booster):
    import numpy as np
    import pandas as pd
    from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgb

    data = pd.read_csv('./open_data/allyears.1987.2013.zip',
                       dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category',
                              'TailNum': 'category', 'CancellationCode': 'category',
                              'IsArrDelayed': 'category', 'IsDepDelayed': 'category',
                              'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32,
                              'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32,
                              'CRSElapsedTime': np.float32, 'AirTime': np.float32,
                              'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32,
                              'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32,
                              'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32,
                              'DayofMonth': np.int32, 'Cancelled': 'category',
                              'CarrierDelay': np.float32, 'WeatherDelay': np.float32,
                              'NASDelay': np.float32, 'SecurityDelay': np.float32,
                              'LateAircraftDelay': np.float32})

    y = data["IsArrDelayed"].cat.codes
    data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month',
                 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime',
                 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum',
                 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay',
                 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut',
                 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
                 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'binary',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250}
    lgb_train = lgb.Dataset(data=data, label=y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=50,
                early_stopping_rounds=5,
                stratified=False,
                verbose_eval=10) 
Example #10
Source File: test_lightgbm.py    From h2o4gpu with Apache License 2.0 5 votes vote down vote up
def test_lightgbm_cpu_airlines_year(booster, year):
    import numpy as np
    import pandas as pd
    from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgb

    data = pd.read_csv('./open_data/airlines/year{0}.zip'.format(year),
                       dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category',
                              'TailNum': 'category', 'CancellationCode': 'category',
                              'IsArrDelayed': 'category', 'IsDepDelayed': 'category',
                              'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32,
                              'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32,
                              'CRSElapsedTime': np.float32, 'AirTime': np.float32,
                              'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32,
                              'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32,
                              'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32,
                              'DayofMonth': np.int32, 'Cancelled': 'category',
                              'CarrierDelay': np.float32, 'WeatherDelay': np.float32,
                              'NASDelay': np.float32, 'SecurityDelay': np.float32,
                              'LateAircraftDelay': np.float32})

    y = data["IsArrDelayed"].cat.codes
    data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month',
                 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime',
                 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum',
                 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay',
                 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut',
                 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
                 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'binary',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250}
    lgb_train = lgb.Dataset(data=data, label=y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=50,
                early_stopping_rounds=5,
                stratified=False,
                verbose_eval=10) 
Example #11
Source File: test_lightgbm.py    From h2o4gpu with Apache License 2.0 5 votes vote down vote up
def test_lightgbm_gpu_airlines_year(booster, year):
    import numpy as np
    import pandas as pd
    from h2o4gpu.util.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgb

    data = pd.read_csv('./open_data/airlines/year{0}.zip'.format(year),
                       dtype={'UniqueCarrier': 'category', 'Origin': 'category', 'Dest': 'category',
                              'TailNum': 'category', 'CancellationCode': 'category',
                              'IsArrDelayed': 'category', 'IsDepDelayed': 'category',
                              'DepTime': np.float32, 'CRSDepTime': np.float32, 'ArrTime': np.float32,
                              'CRSArrTime': np.float32, 'ActualElapsedTime': np.float32,
                              'CRSElapsedTime': np.float32, 'AirTime': np.float32,
                              'ArrDelay': np.float32, 'DepDelay': np.float32, 'Distance': np.float32,
                              'TaxiIn': np.float32, 'TaxiOut': np.float32, 'Diverted': np.float32,
                              'Year': np.int32, 'Month': np.int32, 'DayOfWeek': np.int32,
                              'DayofMonth': np.int32, 'Cancelled': 'category',
                              'CarrierDelay': np.float32, 'WeatherDelay': np.float32,
                              'NASDelay': np.float32, 'SecurityDelay': np.float32,
                              'LateAircraftDelay': np.float32})

    y = data["IsArrDelayed"].cat.codes
    data = data[['UniqueCarrier', 'Origin', 'Dest', 'IsDepDelayed', 'Year', 'Month',
                 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime',
                 'ArrTime', 'CRSArrTime', 'FlightNum', 'TailNum',
                 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay',
                 'DepDelay', 'Distance', 'TaxiIn', 'TaxiOut',
                 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
                 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'binary',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250,
                  'device_type': 'gpu',
                  'gpu_device_id': 0}
    lgb_train = lgb.Dataset(data=data, label=y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=50,
                early_stopping_rounds=5,
                stratified=False,
                verbose_eval=10)