Python Examples of lightgbm.LGBMRegressor

Source File: run.py From nyaggle with MIT License

10 votes

def _dispatch_gbdt_class(algorithm_type: str, type_of_target: str):
    is_regression = type_of_target == 'continuous'

    if algorithm_type == 'lgbm':
        requires_lightgbm()
        from lightgbm import LGBMClassifier, LGBMRegressor
        return LGBMRegressor if is_regression else LGBMClassifier
    elif algorithm_type == 'cat':
        requires_catboost()
        from catboost import CatBoostClassifier, CatBoostRegressor
        return CatBoostRegressor if is_regression else CatBoostClassifier
    else:
        requires_xgboost()
        assert algorithm_type == 'xgb'
        from xgboost import XGBClassifier, XGBRegressor
        return XGBRegressor if is_regression else XGBClassifier

Source File: LightGBM_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

7 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                learning_rate=0.17, n_estimators=modelcount, max_depth=censhu,
                                metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: test_core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

6 votes

def test_regressor_quantile(output, client, listen_port, alpha):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output=output)

    a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42, objective='quantile', alpha=alpha)
    a = a.fit(dX, dy, client=client, sample_weight=dw)
    p1 = a.predict(dX, client=client).compute()
    q1 = np.count_nonzero(y < p1) / y.shape[0]

    b = lightgbm.LGBMRegressor(seed=42, objective='quantile', alpha=alpha)
    b.fit(X, y, sample_weight=w)
    p2 = b.predict(X)
    q2 = np.count_nonzero(y < p2) / y.shape[0]

    # Quantiles should be right
    np.isclose(q1, alpha, atol=.1)
    np.isclose(q2, alpha, atol=.1)

Source File: test_core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

6 votes

def test_regressor(output, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output=output)

    a = dlgbm.LGBMRegressor(time_out=5, local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, client=client, sample_weight=dw)
    p1 = a.predict(dX, client=client)
    if output != 'dataframe':
        s1 = r2_score(dy, p1)
    p1 = p1.compute()

    b = lightgbm.LGBMRegressor(seed=42)
    b.fit(X, y, sample_weight=w)
    s2 = b.score(X, y)
    p2 = b.predict(X)

    # Scores should be the same
    if output != 'dataframe':
        assert_eq(s1, s2, atol=.01)

    # Predictions should be roughly the same
    assert_eq(y, p1, rtol=1., atol=50.)
    assert_eq(y, p2, rtol=1., atol=50.)

Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0

6 votes

def test_04_lgbm_regressor(self):
        print("\ntest 04 (lgbm regressor with preprocessing)\n")
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]
        target_name='mpg'
        x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)
        pd.DataFrame(data=x_test, columns=feature_names).to_csv("test.csv",index=False)
        pipeline_obj = Pipeline([
            ('mapper', DataFrameMapper([
                ('car name', CountVectorizer()),
                (['displacement'],[StandardScaler()]) 
            ])),
            ('lgbmr',LGBMRegressor())
        ])
        pipeline_obj.fit(x_train,y_train)
        file_name = "test04lgbm.pmml"
        lgb_to_pmml(pipeline_obj, feature_names, 'mpg', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(model_name, "test.csv")
        predictions = numpy.array(predictions)
        model_pred = pipeline_obj.predict(x_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def test_lightgbm_regressor2(self):
        model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
        dump_single_regression(model, suffix="2")

Source File: S2elect.py From Feature-Selection with MIT License

5 votes

def main():
    sf = ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器，选择你需要的流程
    sf.ImportDF(df,label = 'nextbuy') #导入数据集以及目标标签
    sf.ImportLossFunction(score, direction = 'ascend') #导入评价函数以及优化方向
    sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征
    sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合
    sf.GenerateCol() #生成特征库 （具体该函数变量请参考根目录下的readme）
    sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
    sf.SetTimeLimit(100) #设置算法运行最长时间，以分钟为单位
    sf.clf = lgbm.LGBMRegressor(random_state=1, num_leaves =6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型
    sf.SetLogFile('record.log') #初始化日志文件
    sf.run(validate) #输入检验函数并开始运行

Source File: sklearn.py From optuna with MIT License

5 votes

def __init__(self, *args, **kwargs):
        # type: (List[Any], Dict[str, Any]) -> None

        warnings.warn(
            "LightGBMTuner doesn't support sklearn API. "
            "Use `train()` or `LightGBMTuner` for hyperparameter tuning."
        )
        super(LGBMRegressor, self).__init__(*args, **kwargs)

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def setUpClass(self):

        update_registered_converter(
            LGBMClassifier, 'LightGbmLGBMClassifier',
            calculate_linear_classifier_output_shapes,
            convert_lightgbm, options={
                'zipmap': [True, False], 'nocl': [True, False]})

        update_registered_converter(
            LGBMRegressor, 'LgbmRegressor',
            calculate_linear_regressor_output_shapes,
            convert_lightgbm)

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def test_lightgbm_regressor(self):
        model = LGBMRegressor(n_estimators=3, min_child_samples=1)
        dump_single_regression(model)

Source File: test_lightgbm.py From sklearn-onnx with MIT License

5 votes

def test_lightgbm_regressor1(self):
        model = LGBMRegressor(n_estimators=1, min_child_samples=1)
        dump_single_regression(model, suffix="1")

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def _checkGBDTRegressor(gbdt):
	if hasattr(gbdt, "apply"):
		return gbdt
	else:
		try:
			from lightgbm import LGBMRegressor
			if isinstance(gbdt, LGBMRegressor):
				return gbdt
		except ImportError:
			pass
	raise ValueError("GBDT class " + _class_name(gbdt) + " is not supported")

Source File: lightgbm_model.py From interpret-community with MIT License

5 votes

def model(self):
        """Retrieve the underlying model.

        :return: The lightgbm model, either classifier or regressor.
        :rtype: Union[LGBMClassifier, LGBMRegressor]
        """
        return self._lgbm

Source File: test_misc_explainers.py From interpret-community with MIT License

5 votes

def _get_create_model(self, classification):
        if classification:
            model = LGBMClassifier()
        else:
            model = LGBMRegressor()

        def create_model(x, y):
            return model.fit(x, y)

        return create_model

Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License

5 votes

def test_lightgbm_regressor(self):
        model = LGBMRegressor(n_estimators=3, min_child_samples=1)
        dump_single_regression(model)

Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License

5 votes

def test_lightgbm_regressor1(self):
        model = LGBMRegressor(n_estimators=1, min_child_samples=1)
        dump_single_regression(model, suffix="1")

Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License

5 votes

def test_lightgbm_regressor2(self):
        model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
        dump_single_regression(model, suffix="2")

Source File: Blending_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

5 votes

def LightGBM_First(self, data, max_depth=9, n_estimators=380):
        model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                   learning_rate=0.17, n_estimators=n_estimators, max_depth=max_depth,
                                   metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)
        model.fit(data['train'][:, :-1], data['train'][:, -1])
        # 注意存储验证数据集结果和预测数据集结果的不同
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])
        # 储存
        self.yanzhneg_pr.append(yanre)
        self.predi.append(prer)
        # 分别计算训练、验证、预测的误差
        # 每计算一折后，要计算训练、验证、预测数据的误差
        xx = self.RMSE(xul, data['train'][:, -1])
        yy = self.RMSE(yanre, data['test'][:, -1])
        pp = self.RMSE(prer, data['predict'][:, -1])
        # 储存误差
        self.error_dict['LightGBM'] = [xx, yy, pp]
        # 验证数据集的真实输出结果
        self.yanzhneg_real = data['test'][:, -1]

        # 预测数据集的真实输出结果
        self.preal = data['predict'][:, -1]
        return print('1层中的LightGBM运行完毕')

    # XGBoost

Source File: test_core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

5 votes

def test_regressor_local_predict(client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output='array')

    a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict(dX)
    p2 = a.to_local().predict(X)
    s1 = r2_score(dy, p1)
    p1 = p1.compute()
    s2 = a.to_local().score(X, y)
    print(s1)

    # Predictions and scores should be the same
    assert_eq(p1, p2)
    np.isclose(s1, s2)

Source File: core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

5 votes

def to_local(self):
        model = lightgbm.LGBMRegressor(**self.get_params())
        self._copy_extra_params(self, model)
        return model

Source File: core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, y=None, sample_weight=None, client=None, **kwargs):
        if client is None:
            client = default_client()

        model_factory = lightgbm.LGBMRegressor
        params = self.get_params(True)
        model = train(client, X, y, params, model_factory, sample_weight, **kwargs)

        self.set_params(**model.get_params())
        self._copy_extra_params(model, self)

        return self

Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0

5 votes

def test_03_lgbm_regressor(self):
        print("\ntest 03 (lgbm regressor without preprocessing)\n")
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test03lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        predictions = numpy.array(predictions)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)

Source File: feature_selection.py From driverlessai-recipes with Apache License 2.0

5 votes

def get_feature_importances(data, shuffle, cats=[], seed=None):
    # Gather real features
    train_features = [f for f in data if f not in [target] + cols2ignore]

    # Shuffle target if required
    y = data[target].copy()
    if shuffle:
        y = data[target].copy().sample(frac=1.0, random_state=seed + 4)
    from h2oaicore.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgbm
    if is_regression:
        model = lgbm.LGBMRegressor(random_state=seed, importance_type=importance, **lgbm_params)
    else:
        model = lgbm.LGBMClassifier(random_state=seed, importance_type=importance, **lgbm_params)
        y = LabelEncoder().fit_transform(y)
    # Fit LightGBM in RF mode, yes it's quicker than sklearn RandomForest
    model.fit(data[train_features], y, categorical_feature=cats)
    # Get feature importances
    imp_df = pd.DataFrame()
    imp_df["feature"] = list(train_features)
    imp_df["importance"] = model.feature_importances_

    return imp_df

Source File: test_lightgbm.py From m2cgen with MIT License

5 votes

def test_regression_random_forest():
    estimator = lightgbm.LGBMRegressor(boosting_type="rf", n_estimators=2,
                                       random_state=1, max_depth=1,
                                       subsample=0.7, subsample_freq=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.954000000000001),
                    ast.CompOpType.GT),
                ast.NumVal(37.24347877367631),
                ast.NumVal(19.936999995530854)),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.971500000000001),
                    ast.CompOpType.GT),
                ast.NumVal(38.48600037864964),
                ast.NumVal(20.183783757300255)),
            ast.BinNumOpType.ADD),
        ast.NumVal(0.5),
        ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected)

Source File: test_lightgbm.py From m2cgen with MIT License

5 votes

def test_regression():
    estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1,
                                       max_depth=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.IfExpr(
            ast.CompExpr(
                ast.FeatureRef(5),
                ast.NumVal(6.918),
                ast.CompOpType.GT),
            ast.NumVal(24.011454621684155),
            ast.NumVal(22.289277544391084)),
        ast.IfExpr(
            ast.CompExpr(
                ast.FeatureRef(12),
                ast.NumVal(9.63),
                ast.CompOpType.GT),
            ast.NumVal(-0.49461212269771115),
            ast.NumVal(0.7174324413014594)),
        ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected)

Source File: LightGBM_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

5 votes

def recspre(exstr, predata, datadict, zhe, count=100):
    tree, te = exstr.split('-')
    model = lgbm.LGBMRegressor(objective='regression', learning_rate=0.15, num_leaves=1200,
                               n_estimators=int(tree), max_depth=int(te),
                               metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)
    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 为了便于展示，选100条数据进行展示
    zongleng = np.arange(len(yucede))
    randomnum = np.random.choice(zongleng, count, replace=False)

    yucede_se = list(np.array(yucede)[randomnum])

    yuce_re = list(np.array(predata[:, -1])[randomnum])

    # 对比
    plt.figure(figsize=(17, 9))
    plt.subplot(2, 1, 1)
    plt.plot(list(range(len(yucede_se))), yucede_se, 'r--', label='预测', lw=2)
    plt.scatter(list(range(len(yuce_re))), yuce_re, c='b', marker='.', label='真实', lw=2)
    plt.xlim(-1, count + 1)
    plt.legend()
    plt.title('预测和真实值对比[最大树数%d]' % int(tree))

    plt.subplot(2, 1, 2)
    plt.plot(list(range(len(yucede_se))), np.array(yuce_re) - np.array(yucede_se), 'k--', marker='s', label='真实-预测', lw=2)
    plt.legend()
    plt.title('预测和真实值相对误差')

    plt.savefig(r'C:\Users\GWT9\Desktop\duibi_lightgbm.jpg')
    return '预测真实对比完毕'

# 最终的主函数

Source File: Stacking_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

5 votes

def LightGBM_First(self, data, max_depth=9, n_estimators=380):
        # 存储每一折中验证数据集的预测结果
        yanzhenglist = []
        # 存储每一折中验证数据集的真实结果
        yanzhenglist_real = []
        # 存储每一折中预测数据集的预测结果
        prelist = []

        # 存储训练、验证、预测数据的误差
        errorlsit = []
        # 开始每一折的训练
        for zhe in [zheshu for zheshu in data.keys() if zheshu != 'predict']:
            model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                       learning_rate=0.17, n_estimators=n_estimators, max_depth=max_depth,
                                       metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

            model.fit(data[zhe]['train'][:, :-1], data[zhe]['train'][:, -1])
            # 注意存储验证数据集结果和预测数据集结果的不同
            # 训练数据集的预测结果
            xul = model.predict(data[zhe]['train'][:, :-1])
            # 验证的预测结果
            yanre = model.predict(data[zhe]['test'][:, :-1])
            # 预测的预测结果
            prer = model.predict(data['predict'][:, :-1])
            yanzhenglist += list(yanre)
            yanzhenglist_real += list(data[zhe]['test'][:, -1])
            prelist.append(prer)
            # 每计算一折后，要计算训练、验证、预测数据的误差
            xx = self.RMSE(xul, data[zhe]['train'][:, -1])
            yy = self.RMSE(yanre, data[zhe]['test'][:, -1])
            pp = self.RMSE(prer, data['predict'][:, -1])
            errorlsit.append([xx, yy, pp])
        # 针对预测数据集的预测结果计算均值
        meanPre = np.mean(np.array(prelist), axis=0)
        # 开始结合
        self.yanzhneg_pr.append(yanzhenglist)
        self.yanzhneg_real = yanzhenglist_real
        self.predi.append(meanPre)
        self.preal = data['predict'][:, -1]
        # 储存误差
        self.error_dict['LightGBM'] = np.mean(np.array(errorlsit), axis=0)

        return print('1层中的LightGBM运行完毕')

    # XGBoost

Source File: test_lgbm_to_pmml_UnitTest.py From nyoka with Apache License 2.0

4 votes

def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [name for name in auto.columns if name not in ('mpg', 'car name')]
        target_name = 'mpg'
        f_name = "lgbmr_pmml.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ('lgbmr', model)
        ])

        pipeline_obj.fit(auto[feature_names], auto[target_name])

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)

Source File: skwrapper.py From Benchmarks with MIT License

4 votes

def get_model(model_or_name, threads=-1, classify=False, seed=0):
    regression_models = {
        'xgboost': (XGBRegressor(max_depth=6, n_jobs=threads, random_state=seed), 'XGBRegressor'),
        'lightgbm': (LGBMRegressor(n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor'),
        'randomforest': (RandomForestRegressor(n_estimators=100, n_jobs=threads), 'RandomForestRegressor'),
        'adaboost': (AdaBoostRegressor(), 'AdaBoostRegressor'),
        'linear': (LinearRegression(), 'LinearRegression'),
        'elasticnet': (ElasticNetCV(positive=True), 'ElasticNetCV'),
        'lasso': (LassoCV(positive=True), 'LassoCV'),
        'ridge': (Ridge(), 'Ridge'),

        'xgb.1k': (XGBRegressor(max_depth=6, n_estimators=1000, n_jobs=threads, random_state=seed), 'XGBRegressor.1K'),
        'xgb.10k': (XGBRegressor(max_depth=6, n_estimators=10000, n_jobs=threads, random_state=seed), 'XGBRegressor.10K'),
        'lgbm.1k': (LGBMRegressor(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor.1K'),
        'lgbm.10k': (LGBMRegressor(n_estimators=10000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor.10K'),
        'rf.1k': (RandomForestRegressor(n_estimators=1000, n_jobs=threads), 'RandomForestRegressor.1K'),
        'rf.10k': (RandomForestRegressor(n_estimators=10000, n_jobs=threads), 'RandomForestRegressor.10K')
    }

    classification_models = {
        'xgboost': (XGBClassifier(max_depth=6, n_jobs=threads, random_state=seed), 'XGBClassifier'),
        'lightgbm': (LGBMClassifier(n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier'),
        'randomforest': (RandomForestClassifier(n_estimators=100, n_jobs=threads), 'RandomForestClassifier'),
        'adaboost': (AdaBoostClassifier(), 'AdaBoostClassifier'),
        'logistic': (LogisticRegression(), 'LogisticRegression'),
        'gaussian': (GaussianProcessClassifier(), 'GaussianProcessClassifier'),
        'knn': (KNeighborsClassifier(), 'KNeighborsClassifier'),
        'bayes': (GaussianNB(), 'GaussianNB'),
        'svm': (SVC(), 'SVC'),

        'xgb.1k': (XGBClassifier(max_depth=6, n_estimators=1000, n_jobs=threads, random_state=seed), 'XGBClassifier.1K'),
        'xgb.10k': (XGBClassifier(max_depth=6, n_estimators=10000, n_jobs=threads, random_state=seed), 'XGBClassifier.10K'),
        'lgbm.1k': (LGBMClassifier(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier.1K'),
        'lgbm.10k': (LGBMClassifier(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier.10K'),
        'rf.1k': (RandomForestClassifier(n_estimators=1000, n_jobs=threads), 'RandomForestClassifier.1K'),
        'rf.10k': (RandomForestClassifier(n_estimators=10000, n_jobs=threads), 'RandomForestClassifier.10K')
    }

    if isinstance(model_or_name, str):
        if classify:
            model_and_name = classification_models.get(model_or_name.lower())
        else:
            model_and_name = regression_models.get(model_or_name.lower())
        if not model_and_name:
            raise Exception("unrecognized model: '{}'".format(model_or_name))
        else:
            model, name = model_and_name
    else:
        model = model_or_name
        name = re.search("\w+", str(model)).group(0)

    return model, name

Source File: lightgbm_model.py From interpret-community with MIT License

4 votes

def __init__(self, multiclass=False, random_state=DEFAULT_RANDOM_STATE,
                 shap_values_output=ShapValuesOutput.DEFAULT, classification=True, **kwargs):
        """Initialize the LightGBM Model.

        Additional arguments to LightGBMClassifier and LightGBMRegressor can be passed through kwargs.

        :param multiclass: Set to true to generate a multiclass model.
        :type multiclass: bool
        :param random_state: Int to seed the model.
        :type random_state: int
        :param shap_values_output: The type of the output from explain_local when using TreeExplainer.
            Currently only types 'default', 'probability' and 'teacher_probability' are supported.  If
            'probability' is specified, then we approximately scale the raw log-odds values from the
            TreeExplainer to probabilities.
        :type shap_values_output: interpret_community.common.constants.ShapValuesOutput
        :param classification: Indicates if this is a classification or regression explanation.
        :type classification: bool
        """
        self.multiclass = multiclass
        initializer_args = _get_initializer_args(kwargs)
        if self.multiclass:
            initializer = LGBMClassifier
        else:
            initializer = LGBMRegressor
        self._lgbm = initializer(random_state=random_state, **initializer_args)
        super(LGBMExplainableModel, self).__init__(**kwargs)
        self._logger.debug('Initializing LGBMExplainableModel')
        self._method = 'lightgbm'
        self._tree_explainer = None
        self._shap_values_output = shap_values_output
        self._classification = classification

Python lightgbm.LGBMRegressor() Examples