Python Examples of lightgbm.LGBMClassifier

Source File: LightGBM_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

7 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50,
                                learning_rate=0.1, n_estimators=modelcount, max_depth=censhu,
                                bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算f1度量
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: test_cross_validate.py From nyaggle with MIT License

7 votes

def test_fit_params_callback():
    X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    models = [LGBMClassifier(n_estimators=300) for _ in range(5)]

    sample_weights = np.random.randint(1, 10, size=len(X_train))
    sample_weights = sample_weights / sample_weights.sum()

    def fit_params(n: int, train_index: List[int], valid_index: List[int]):
        return {
            'early_stopping_rounds': 100,
            'sample_weight': list(sample_weights[train_index]),
            'eval_sample_weight': [list(sample_weights[valid_index])]
        }

    result_w_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
                                     eval_func=roc_auc_score, fit_params=fit_params)

    result_wo_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
                                      eval_func=roc_auc_score, fit_params={'early_stopping_rounds': 50})

    assert result_w_weight.scores[-1] != result_wo_weight.scores[-1]

Source File: prediction_model_factory.py From redshells with MIT License

6 votes

def __init__(self):
        self._models = dict()
        try:
            import sklearn.ensemble
            self._models['RandomForestClassifier'] = sklearn.ensemble.RandomForestClassifier
        except ImportError:
            pass

        try:
            import xgboost
            self._models['XGBClassifier'] = xgboost.XGBClassifier
        except ImportError:
            pass

        try:
            import lightgbm
            self._models['LGBMClassifier'] = lightgbm.LGBMClassifier
        except ImportError:
            pass

        try:
            import catboost
            self._models['CatBoostClassifier'] = catboost.CatBoostClassifier
        except ImportError:
            pass

Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0

6 votes

def test_01_lgbm_classifier(self):
        print("\ntest 01 (lgbm classifier with preprocessing) [binary-class]\n")
        model = LGBMClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MinMaxScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y_bin)
        file_name = "test01lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: test_lightgbm.py From hyperparameter_hunter with MIT License

6 votes

def exp_lgb_0():
    return CVExperiment(
        model_initializer=LGBMClassifier,
        model_init_params=dict(
            boosting_type="gbdt",
            num_leaves=5,
            n_estimators=10,
            max_depth=5,
            min_child_samples=1,
            subsample=0.5,
            verbose=-1,
        ),
    )


##################################################
# Optimization Protocol Fixtures
##################################################

Source File: test_lightgbm.py From hyperparameter_hunter with MIT License

6 votes

def opt_lgb_0(request):
    optimizer = BayesianOptPro(target_metric=request.param, iterations=2, random_state=32)
    optimizer.forge_experiment(
        model_initializer=LGBMClassifier,
        model_init_params=dict(
            boosting_type=Categorical(["gbdt", "dart"]),
            num_leaves=Integer(2, 8),
            n_estimators=10,
            max_depth=5,
            min_child_samples=1,
            subsample=Real(0.4, 0.7),
            verbose=-1,
        ),
    )
    optimizer.go()
    yield optimizer

    assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
    # lb = pd.read_csv(
    #     # Path(assets_dir) / "HyperparameterHunterAssets" / "Leaderboards" / "GlobalLeaderboard.csv",
    #     Path(assets_dir) / "Leaderboards" / "GlobalLeaderboard.csv",
    # )
    # assert lb.columns[0] == f"oof_{request.param}"

Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0

6 votes

def test_02_lgbm_classifier(self):
        print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
        model = LGBMClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test02lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: test_flofo_importance.py From lofo-importance with MIT License

6 votes

def test_flofo_importance():
    df = generate_test_data(100000)
    df.loc[df["A"] < df["A"].median(), "A"] = None

    train_df, val_df = train_test_split(df, test_size=0.2, random_state=0)
    val_df_checkpoint = val_df.copy()

    features = ["A", "B", "C", "D"]

    lgbm = LGBMClassifier(random_state=0, n_jobs=1)
    lgbm.fit(train_df[features], train_df["binary_target"])

    flofo = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc')
    flofo_parallel = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc', n_jobs=3)

    importance_df = flofo.get_importance()
    importance_df_parallel = flofo_parallel.get_importance()
    is_feature_order_same = importance_df["feature"].values == importance_df_parallel["feature"].values

    plot_importance(importance_df)

    assert is_feature_order_same.sum() == len(features), "Parallel FLOFO returned different result!"
    assert val_df.equals(val_df_checkpoint), "LOFOImportance mutated the dataframe!"
    assert len(features) == importance_df.shape[0], "Missing importance value for some features!"
    assert importance_df["feature"].values[0] == "B", "Most important feature is different than B!"

Source File: test_lightgbm.py From m2cgen with MIT License

6 votes

def test_multi_class():
    estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1,
                                        max_depth=1)
    estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3]))

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    exponent = ast.ExpExpr(
        ast.NumVal(-1.0986122886681098),
        to_reuse=True)

    exponent_sum = ast.BinNumExpr(
        ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD),
        exponent,
        ast.BinNumOpType.ADD,
        to_reuse=True)

    softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV)

    expected = ast.VectorVal([softmax] * 3)

    assert utils.cmp_exprs(actual, expected)

Source File: test_json_pretty_viz.py From lale with Apache License 2.0

6 votes

def test_autoai_libs_t_no_op(self):
        from lale.lib.autoai_libs import TNoOp
        from lightgbm import LGBMClassifier
        from lale.operators import make_pipeline
        t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[])
        lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
        pipeline = make_pipeline(t_no_op, lgbm_classifier)
        expected = \
"""from lale.lib.autoai_libs import TNoOp
from lightgbm import LGBMClassifier
from lale.operators import make_pipeline

t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(t_no_op, lgbm_classifier)"""
        self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))

Source File: utils.py From m2cgen with MIT License

6 votes

def __call__(self, estimator):
        fitted_estimator = estimator.fit(self.X_train, self.y_train)

        if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC,
                                  LightBaseClassifier)):
            y_pred = estimator.decision_function(self.X_test)
        elif isinstance(estimator, DecisionTreeClassifier):
            y_pred = estimator.predict_proba(self.X_test.astype(np.float32))
        elif isinstance(
                estimator,
                (ForestClassifier, XGBClassifier, LGBMClassifier)):
            y_pred = estimator.predict_proba(self.X_test)
        else:
            y_pred = estimator.predict(self.X_test)

        return self.X_test, y_pred, fitted_estimator

Source File: test_cross_validate.py From nyaggle with MIT License

6 votes

def test_cv_lgbm_df():
    X, y = make_classification_df(n_samples=1024, n_num_features=20, n_cat_features=1, class_sep=0.98, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    models = [LGBMClassifier(n_estimators=300) for _ in range(5)]

    pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5,
                                                             eval_func=roc_auc_score)

    print(scores)
    assert len(scores) == 5 + 1
    assert scores[-1] >= 0.85  # overall roc_auc
    assert roc_auc_score(y_train, pred_oof) == scores[-1]
    assert roc_auc_score(y_test, pred_test) >= 0.85  # test roc_auc
    assert roc_auc_score(y_test, models[0].predict_proba(X_test)[:, 1]) >= 0.85  # make sure models are trained
    assert len(importance) == 5
    assert list(importance[0].columns) == ['feature', 'importance']
    assert len(importance[0]) == 20 + 1
    assert models[0].booster_.num_trees() < 300  # making sure early stopping worked

Source File: test_cross_validate.py From nyaggle with MIT License

6 votes

def test_cv_lgbm():
    X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    models = [LGBMClassifier(n_estimators=300) for _ in range(5)]

    pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5,
                                                             eval_func=roc_auc_score,
                                                             fit_params={'early_stopping_rounds': 200})

    print(scores)
    assert len(scores) == 5 + 1
    assert scores[-1] >= 0.85  # overall roc_auc
    assert roc_auc_score(y_train, pred_oof) == scores[-1]
    assert roc_auc_score(y_test, pred_test) >= 0.85  # test roc_auc
    assert roc_auc_score(y, models[0].predict_proba(X)[:, 1]) >= 0.85  # make sure models are trained
    assert len(importance) == 5
    assert list(importance[0].columns) == ['feature', 'importance']
    assert len(importance[0]) == 20

Source File: LightGBM_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

6 votes

def recspre(estrs, predata, datadict, zhe):

    mo, ze = estrs.split('-')
    model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50,
                                learning_rate=0.1, n_estimators=int(mo), max_depth=int(ze),
                                bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2)

    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 计算混淆矩阵

    print(ConfuseMatrix(predata[:, -1], yucede))

    return fmse(predata[:, -1], yucede)

# 主函数

Source File: test_core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

6 votes

def test_classifier(output, centers, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('classification', output=output, centers=centers)

    a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict(dX, client=client)
    s1 = accuracy_score(dy, p1)
    p1 = p1.compute()

    b = lightgbm.LGBMClassifier()
    b.fit(X, y, sample_weight=w)
    p2 = b.predict(X)
    s2 = b.score(X, y)
    print(confusion_matrix(y, p1))
    print(confusion_matrix(y, p2))

    assert_eq(s1, s2)
    print(s1)

    assert_eq(p1, p2)
    assert_eq(y, p1)
    assert_eq(y, p2)

Source File: test_lofo_importance.py From lofo-importance with MIT License

6 votes

def test_feature_groups():
    df = generate_test_data(1000, text=True)
    features = ["A", "B", "C", "D"]

    cv = CountVectorizer(ngram_range=(3, 3), analyzer="char")
    feature_groups = dict()
    feature_groups["names"] = cv.fit_transform(df["T"])
    feature_groups["interactions"] = df[["A", "B"]].values*df[["C", "D"]].values

    dataset = Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups)

    lgbm = LGBMClassifier(random_state=0, n_jobs=4)

    lofo = LOFOImportance(dataset, model=lgbm, cv=4, scoring='roc_auc')

    importance_df = lofo.get_importance()

    assert len(features) + len(feature_groups) == importance_df.shape[0], "Missing importance value for some features!"
    assert importance_df["feature"].values[0] == "names", "Most important feature is different than 'names'!"

Source File: test_json_pretty_viz.py From lale with Apache License 2.0

6 votes

def test_autoai_libs_tam_2(self):
        from lale.lib.autoai_libs import TAM
        import numpy as np
        from lightgbm import LGBMClassifier
        from sklearn.decomposition import PCA
        from lale.operators import make_pipeline
        pca = PCA(copy=False)
        tam = TAM(tans_class=pca, name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')])
        lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
        pipeline = make_pipeline(tam, lgbm_classifier)
        expected = \
"""from lale.lib.autoai_libs import TAM
import sklearn.decomposition.pca
import numpy as np
from lightgbm import LGBMClassifier
from lale.operators import make_pipeline

tam = TAM(tans_class=sklearn.decomposition.pca.PCA(copy=False, iterated_power='auto', n_components=None, random_state=None,   svd_solver='auto', tol=0.0, whiten=False), name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(tam, lgbm_classifier)"""
        self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))

Source File: demo_IJCAI_2018.py From Feature-Selection with MIT License

5 votes

def main():
    sf = ss.Select(Sequence = True, Random = True, Cross = False) #select the way you want to process searching
    sf.ImportDF(prepareData(),label = 'is_trade')
    sf.ImportLossFunction(modelscore,direction = 'descend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(['used','instance_id', 'item_property_list', 'context_id', 'context_timestamp', 'predict_category_property', 'is_trade'])
    sf.InitialFeatures(['item_category_list', 'item_price_level','item_sales_level','item_collected_level', 'item_pv_level','day'])
    sf.GenerateCol(key = 'mean', selectstep = 2)
    sf.SetSample(0.1, samplemode = 0, samplestate = 0)
#    sf.SetFeaturesLimit(5)
    sf.SetTimeLimit(1)
    sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=8)
    sf.SetLogFile('recordml.log')
    sf.run(validation)

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def setUpClass(self):

        update_registered_converter(
            LGBMClassifier, 'LightGbmLGBMClassifier',
            calculate_linear_classifier_output_shapes,
            convert_lightgbm, options={
                'zipmap': [True, False], 'nocl': [True, False]})

        update_registered_converter(
            LGBMRegressor, 'LgbmRegressor',
            calculate_linear_regressor_output_shapes,
            convert_lightgbm)

Source File: sklearn.py From optuna with MIT License

5 votes

def __init__(self, *args, **kwargs):
        # type: (List[Any], Dict[str, Any]) -> None

        warnings.warn(
            "LightGBMTuner doesn't support sklearn API. "
            "Use `train()` or `LightGBMTuner` for hyperparameter tuning."
        )
        super(LGBMClassifier, self).__init__(*args, **kwargs)

Source File: tester.py From Text-Classification-Benchmark with MIT License

5 votes

def init_estimators():
    return [
        {'classifier': 'NB', 'model': MultinomialNB()},
        {'classifier': 'LR', 'model': LogisticRegression(random_state=42)},
        {'classifier': 'L-SVM', 'model': LinearSVC(max_iter=1000, random_state=42)},
        {'classifier': 'RBF-SVM', 'model': SVC(max_iter=1000, random_state=42)},
        {'classifier': 'RF', 'model': RandomForestClassifier(n_estimators=100, random_state=42)},
        {'classifier': 'XGB', 'model': XGBClassifier(n_estimators=100, random_state=42)},
        {'classifier': 'LGBM', 'model': LGBMClassifier(n_estimators=100, random_state=42)},
    ]

Source File: S1elect.py From Feature-Selection with MIT License

5 votes

def main():
    sf = sequence_selection as ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器，选择你需要的流程
    sf.ImportDF(prepareData(),label = 'buy') #导入数据集以及目标标签
    sf.ImportLossFunction(score1, direction = 'ascend') #导入评价函数以及优化方向
    #sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征
    sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合
    sf.GenerateCol() #生成特征库 （具体该函数变量请参考根目录下的readme）
    sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
    sf.SetTimeLimit(100) #设置算法运行最长时间，以分钟为单位
    sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型
    sf.SetLogFile('record.log') #初始化日志文件
    sf.run(validate) #输入检验函数并开始运行

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def _checkGBDTClassifier(gbdt):
	if hasattr(gbdt, "apply"):
		return gbdt
	else:
		try:
			from lightgbm import LGBMClassifier
			if isinstance(gbdt, LGBMClassifier):
				return gbdt
		except ImportError:
			pass
	raise ValueError("GBDT class " + _class_name(gbdt) + " is not supported")

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def test_lightgbm_classifier(self):
        model = LGBMClassifier(n_estimators=3, min_child_samples=1)
        dump_binary_classification(
            model, allow_failure="StrictVersion(onnx.__version__) < "
                                 "StrictVersion('1.3.0')")
        dump_multiple_classification(
            model,
            allow_failure="StrictVersion(onnx.__version__) < "
                          "StrictVersion('1.3.0')")

Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License

5 votes

def test_lightgbm_classifier_zipmap(self):
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = numpy.array(X, dtype=numpy.float32)
        y = [0, 1, 0, 1]
        model = LGBMClassifier(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        onx = convert_model(
            model, 'dummy', input_types=[('X', FloatTensorType([None, X.shape[1]]))])
        assert "zipmap" in str(onx).lower()

Source File: lightgbm_model.py From interpret-community with MIT License

5 votes

def model(self):
        """Retrieve the underlying model.

        :return: The lightgbm model, either classifier or regressor.
        :rtype: Union[LGBMClassifier, LGBMRegressor]
        """
        return self._lgbm

Source File: common_utils.py From interpret-community with MIT License

5 votes

def create_lightgbm_classifier(X, y):
    lgbm = LGBMClassifier(boosting_type='gbdt', learning_rate=0.1,
                          max_depth=5, n_estimators=200, n_jobs=1, random_state=777)
    model = lgbm.fit(X, y)
    return model

Source File: test_misc_explainers.py From interpret-community with MIT License

5 votes

def _get_create_model(self, classification):
        if classification:
            model = LGBMClassifier()
        else:
            model = LGBMRegressor()

        def create_model(x, y):
            return model.fit(x, y)

        return create_model

Source File: test_onnxmltools.py From onnxconverter-common with MIT License

5 votes

def test_lightgbm(self):
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        model = lightgbm.LGBMClassifier(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        onx = xmlt.convert(
            model, 'dummy', initial_types=[('X', FloatTensorType([None, X.shape[1]]))],
            target_opset=9)
        assert "ir_version: 4" in str(onx).lower()

Source File: _parse.py From onnxmltools with MIT License

5 votes

def _parse_lightgbm(scope, model, inputs):
    '''
    This is a delegate function. It doesn't nothing but invoke the correct parsing function according to the input
    model's type.
    :param scope: Scope object
    :param model: A lightgbm object
    :param inputs: A list of variables
    :return: The output variables produced by the input model
    '''
    if isinstance(model, LGBMClassifier):
        return _parse_sklearn_classifier(scope, model, inputs)
    if (isinstance(model, WrappedBooster) and
            model.operator_name == 'LgbmClassifier'):
        return _parse_sklearn_classifier(scope, model, inputs)
    return _parse_lightgbm_simple_model(scope, model, inputs)

Python lightgbm.LGBMClassifier() Examples