Python lightgbm.LGBMRegressor() Examples

The following are 30 code examples of lightgbm.LGBMRegressor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lightgbm , or try the search function .
Example #1
Source File: run.py    From nyaggle with MIT License 10 votes vote down vote up
def _dispatch_gbdt_class(algorithm_type: str, type_of_target: str):
    is_regression = type_of_target == 'continuous'

    if algorithm_type == 'lgbm':
        requires_lightgbm()
        from lightgbm import LGBMClassifier, LGBMRegressor
        return LGBMRegressor if is_regression else LGBMClassifier
    elif algorithm_type == 'cat':
        requires_catboost()
        from catboost import CatBoostClassifier, CatBoostRegressor
        return CatBoostRegressor if is_regression else CatBoostClassifier
    else:
        requires_xgboost()
        assert algorithm_type == 'xgb'
        from xgboost import XGBClassifier, XGBRegressor
        return XGBRegressor if is_regression else XGBClassifier 
Example #2
Source File: LightGBM_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 7 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                learning_rate=0.17, n_estimators=modelcount, max_depth=censhu,
                                metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #3
Source File: test_core.py    From dask-lightgbm with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_regressor_quantile(output, client, listen_port, alpha):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output=output)

    a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42, objective='quantile', alpha=alpha)
    a = a.fit(dX, dy, client=client, sample_weight=dw)
    p1 = a.predict(dX, client=client).compute()
    q1 = np.count_nonzero(y < p1) / y.shape[0]

    b = lightgbm.LGBMRegressor(seed=42, objective='quantile', alpha=alpha)
    b.fit(X, y, sample_weight=w)
    p2 = b.predict(X)
    q2 = np.count_nonzero(y < p2) / y.shape[0]

    # Quantiles should be right
    np.isclose(q1, alpha, atol=.1)
    np.isclose(q2, alpha, atol=.1) 
Example #4
Source File: test_core.py    From dask-lightgbm with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_regressor(output, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output=output)

    a = dlgbm.LGBMRegressor(time_out=5, local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, client=client, sample_weight=dw)
    p1 = a.predict(dX, client=client)
    if output != 'dataframe':
        s1 = r2_score(dy, p1)
    p1 = p1.compute()

    b = lightgbm.LGBMRegressor(seed=42)
    b.fit(X, y, sample_weight=w)
    s2 = b.score(X, y)
    p2 = b.predict(X)

    # Scores should be the same
    if output != 'dataframe':
        assert_eq(s1, s2, atol=.01)

    # Predictions should be roughly the same
    assert_eq(y, p1, rtol=1., atol=50.)
    assert_eq(y, p2, rtol=1., atol=50.) 
Example #5
Source File: testScoreWithAdapaLgbm.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_04_lgbm_regressor(self):
        print("\ntest 04 (lgbm regressor with preprocessing)\n")
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]
        target_name='mpg'
        x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)
        pd.DataFrame(data=x_test, columns=feature_names).to_csv("test.csv",index=False)
        pipeline_obj = Pipeline([
            ('mapper', DataFrameMapper([
                ('car name', CountVectorizer()),
                (['displacement'],[StandardScaler()]) 
            ])),
            ('lgbmr',LGBMRegressor())
        ])
        pipeline_obj.fit(x_train,y_train)
        file_name = "test04lgbm.pmml"
        lgb_to_pmml(pipeline_obj, feature_names, 'mpg', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(model_name, "test.csv")
        predictions = numpy.array(predictions)
        model_pred = pipeline_obj.predict(x_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) 
Example #6
Source File: test_lightgbm.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_lightgbm_regressor2(self):
        model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
        dump_single_regression(model, suffix="2") 
Example #7
Source File: S2elect.py    From Feature-Selection with MIT License 5 votes vote down vote up
def main():
    sf = ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程
    sf.ImportDF(df,label = 'nextbuy') #导入数据集以及目标标签
    sf.ImportLossFunction(score, direction = 'ascend') #导入评价函数以及优化方向
    sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征
    sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合
    sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme)
    sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
    sf.SetTimeLimit(100) #设置算法运行最长时间,以分钟为单位
    sf.clf = lgbm.LGBMRegressor(random_state=1, num_leaves =6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型
    sf.SetLogFile('record.log') #初始化日志文件
    sf.run(validate) #输入检验函数并开始运行 
Example #8
Source File: sklearn.py    From optuna with MIT License 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        # type: (List[Any], Dict[str, Any]) -> None

        warnings.warn(
            "LightGBMTuner doesn't support sklearn API. "
            "Use `train()` or `LightGBMTuner` for hyperparameter tuning."
        )
        super(LGBMRegressor, self).__init__(*args, **kwargs) 
Example #9
Source File: test_lightgbm.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def setUpClass(self):

        update_registered_converter(
            LGBMClassifier, 'LightGbmLGBMClassifier',
            calculate_linear_classifier_output_shapes,
            convert_lightgbm, options={
                'zipmap': [True, False], 'nocl': [True, False]})

        update_registered_converter(
            LGBMRegressor, 'LgbmRegressor',
            calculate_linear_regressor_output_shapes,
            convert_lightgbm) 
Example #10
Source File: test_lightgbm.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_lightgbm_regressor(self):
        model = LGBMRegressor(n_estimators=3, min_child_samples=1)
        dump_single_regression(model) 
Example #11
Source File: test_lightgbm.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_lightgbm_regressor1(self):
        model = LGBMRegressor(n_estimators=1, min_child_samples=1)
        dump_single_regression(model, suffix="1") 
Example #12
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def _checkGBDTRegressor(gbdt):
	if hasattr(gbdt, "apply"):
		return gbdt
	else:
		try:
			from lightgbm import LGBMRegressor
			if isinstance(gbdt, LGBMRegressor):
				return gbdt
		except ImportError:
			pass
	raise ValueError("GBDT class " + _class_name(gbdt) + " is not supported") 
Example #13
Source File: lightgbm_model.py    From interpret-community with MIT License 5 votes vote down vote up
def model(self):
        """Retrieve the underlying model.

        :return: The lightgbm model, either classifier or regressor.
        :rtype: Union[LGBMClassifier, LGBMRegressor]
        """
        return self._lgbm 
Example #14
Source File: test_misc_explainers.py    From interpret-community with MIT License 5 votes vote down vote up
def _get_create_model(self, classification):
        if classification:
            model = LGBMClassifier()
        else:
            model = LGBMRegressor()

        def create_model(x, y):
            return model.fit(x, y)

        return create_model 
Example #15
Source File: test_LightGbmTreeEnsembleConverters.py    From onnxmltools with MIT License 5 votes vote down vote up
def test_lightgbm_regressor(self):
        model = LGBMRegressor(n_estimators=3, min_child_samples=1)
        dump_single_regression(model) 
Example #16
Source File: test_LightGbmTreeEnsembleConverters.py    From onnxmltools with MIT License 5 votes vote down vote up
def test_lightgbm_regressor1(self):
        model = LGBMRegressor(n_estimators=1, min_child_samples=1)
        dump_single_regression(model, suffix="1") 
Example #17
Source File: test_LightGbmTreeEnsembleConverters.py    From onnxmltools with MIT License 5 votes vote down vote up
def test_lightgbm_regressor2(self):
        model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
        dump_single_regression(model, suffix="2") 
Example #18
Source File: Blending_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 5 votes vote down vote up
def LightGBM_First(self, data, max_depth=9, n_estimators=380):
        model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                   learning_rate=0.17, n_estimators=n_estimators, max_depth=max_depth,
                                   metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)
        model.fit(data['train'][:, :-1], data['train'][:, -1])
        # 注意存储验证数据集结果和预测数据集结果的不同
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])
        # 储存
        self.yanzhneg_pr.append(yanre)
        self.predi.append(prer)
        # 分别计算训练、验证、预测的误差
        # 每计算一折后,要计算训练、验证、预测数据的误差
        xx = self.RMSE(xul, data['train'][:, -1])
        yy = self.RMSE(yanre, data['test'][:, -1])
        pp = self.RMSE(prer, data['predict'][:, -1])
        # 储存误差
        self.error_dict['LightGBM'] = [xx, yy, pp]
        # 验证数据集的真实输出结果
        self.yanzhneg_real = data['test'][:, -1]

        # 预测数据集的真实输出结果
        self.preal = data['predict'][:, -1]
        return print('1层中的LightGBM运行完毕')

    # XGBoost 
Example #19
Source File: test_core.py    From dask-lightgbm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_regressor_local_predict(client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output='array')

    a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict(dX)
    p2 = a.to_local().predict(X)
    s1 = r2_score(dy, p1)
    p1 = p1.compute()
    s2 = a.to_local().score(X, y)
    print(s1)

    # Predictions and scores should be the same
    assert_eq(p1, p2)
    np.isclose(s1, s2) 
Example #20
Source File: core.py    From dask-lightgbm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def to_local(self):
        model = lightgbm.LGBMRegressor(**self.get_params())
        self._copy_extra_params(self, model)
        return model 
Example #21
Source File: core.py    From dask-lightgbm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y=None, sample_weight=None, client=None, **kwargs):
        if client is None:
            client = default_client()

        model_factory = lightgbm.LGBMRegressor
        params = self.get_params(True)
        model = train(client, X, y, params, model_factory, sample_weight, **kwargs)

        self.set_params(**model.get_params())
        self._copy_extra_params(model, self)

        return self 
Example #22
Source File: testScoreWithAdapaLgbm.py    From nyoka with Apache License 2.0 5 votes vote down vote up
def test_03_lgbm_regressor(self):
        print("\ntest 03 (lgbm regressor without preprocessing)\n")
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test03lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        predictions = numpy.array(predictions)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) 
Example #23
Source File: feature_selection.py    From driverlessai-recipes with Apache License 2.0 5 votes vote down vote up
def get_feature_importances(data, shuffle, cats=[], seed=None):
    # Gather real features
    train_features = [f for f in data if f not in [target] + cols2ignore]

    # Shuffle target if required
    y = data[target].copy()
    if shuffle:
        y = data[target].copy().sample(frac=1.0, random_state=seed + 4)
    from h2oaicore.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
    import lightgbm as lgbm
    if is_regression:
        model = lgbm.LGBMRegressor(random_state=seed, importance_type=importance, **lgbm_params)
    else:
        model = lgbm.LGBMClassifier(random_state=seed, importance_type=importance, **lgbm_params)
        y = LabelEncoder().fit_transform(y)
    # Fit LightGBM in RF mode, yes it's quicker than sklearn RandomForest
    model.fit(data[train_features], y, categorical_feature=cats)
    # Get feature importances
    imp_df = pd.DataFrame()
    imp_df["feature"] = list(train_features)
    imp_df["importance"] = model.feature_importances_

    return imp_df 
Example #24
Source File: test_lightgbm.py    From m2cgen with MIT License 5 votes vote down vote up
def test_regression_random_forest():
    estimator = lightgbm.LGBMRegressor(boosting_type="rf", n_estimators=2,
                                       random_state=1, max_depth=1,
                                       subsample=0.7, subsample_freq=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.954000000000001),
                    ast.CompOpType.GT),
                ast.NumVal(37.24347877367631),
                ast.NumVal(19.936999995530854)),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.971500000000001),
                    ast.CompOpType.GT),
                ast.NumVal(38.48600037864964),
                ast.NumVal(20.183783757300255)),
            ast.BinNumOpType.ADD),
        ast.NumVal(0.5),
        ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected) 
Example #25
Source File: test_lightgbm.py    From m2cgen with MIT License 5 votes vote down vote up
def test_regression():
    estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1,
                                       max_depth=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.IfExpr(
            ast.CompExpr(
                ast.FeatureRef(5),
                ast.NumVal(6.918),
                ast.CompOpType.GT),
            ast.NumVal(24.011454621684155),
            ast.NumVal(22.289277544391084)),
        ast.IfExpr(
            ast.CompExpr(
                ast.FeatureRef(12),
                ast.NumVal(9.63),
                ast.CompOpType.GT),
            ast.NumVal(-0.49461212269771115),
            ast.NumVal(0.7174324413014594)),
        ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected) 
Example #26
Source File: LightGBM_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 5 votes vote down vote up
def recspre(exstr, predata, datadict, zhe, count=100):
    tree, te = exstr.split('-')
    model = lgbm.LGBMRegressor(objective='regression', learning_rate=0.15, num_leaves=1200,
                               n_estimators=int(tree), max_depth=int(te),
                               metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)
    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 为了便于展示,选100条数据进行展示
    zongleng = np.arange(len(yucede))
    randomnum = np.random.choice(zongleng, count, replace=False)

    yucede_se = list(np.array(yucede)[randomnum])

    yuce_re = list(np.array(predata[:, -1])[randomnum])

    # 对比
    plt.figure(figsize=(17, 9))
    plt.subplot(2, 1, 1)
    plt.plot(list(range(len(yucede_se))), yucede_se, 'r--', label='预测', lw=2)
    plt.scatter(list(range(len(yuce_re))), yuce_re, c='b', marker='.', label='真实', lw=2)
    plt.xlim(-1, count + 1)
    plt.legend()
    plt.title('预测和真实值对比[最大树数%d]' % int(tree))

    plt.subplot(2, 1, 2)
    plt.plot(list(range(len(yucede_se))), np.array(yuce_re) - np.array(yucede_se), 'k--', marker='s', label='真实-预测', lw=2)
    plt.legend()
    plt.title('预测和真实值相对误差')

    plt.savefig(r'C:\Users\GWT9\Desktop\duibi_lightgbm.jpg')
    return '预测真实对比完毕'

# 最终的主函数 
Example #27
Source File: Stacking_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 5 votes vote down vote up
def LightGBM_First(self, data, max_depth=9, n_estimators=380):
        # 存储每一折中验证数据集的预测结果
        yanzhenglist = []
        # 存储每一折中验证数据集的真实结果
        yanzhenglist_real = []
        # 存储每一折中预测数据集的预测结果
        prelist = []

        # 存储训练、验证、预测数据的误差
        errorlsit = []
        # 开始每一折的训练
        for zhe in [zheshu for zheshu in data.keys() if zheshu != 'predict']:
            model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                       learning_rate=0.17, n_estimators=n_estimators, max_depth=max_depth,
                                       metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

            model.fit(data[zhe]['train'][:, :-1], data[zhe]['train'][:, -1])
            # 注意存储验证数据集结果和预测数据集结果的不同
            # 训练数据集的预测结果
            xul = model.predict(data[zhe]['train'][:, :-1])
            # 验证的预测结果
            yanre = model.predict(data[zhe]['test'][:, :-1])
            # 预测的预测结果
            prer = model.predict(data['predict'][:, :-1])
            yanzhenglist += list(yanre)
            yanzhenglist_real += list(data[zhe]['test'][:, -1])
            prelist.append(prer)
            # 每计算一折后,要计算训练、验证、预测数据的误差
            xx = self.RMSE(xul, data[zhe]['train'][:, -1])
            yy = self.RMSE(yanre, data[zhe]['test'][:, -1])
            pp = self.RMSE(prer, data['predict'][:, -1])
            errorlsit.append([xx, yy, pp])
        # 针对预测数据集的预测结果计算均值
        meanPre = np.mean(np.array(prelist), axis=0)
        # 开始结合
        self.yanzhneg_pr.append(yanzhenglist)
        self.yanzhneg_real = yanzhenglist_real
        self.predi.append(meanPre)
        self.preal = data['predict'][:, -1]
        # 储存误差
        self.error_dict['LightGBM'] = np.mean(np.array(errorlsit), axis=0)

        return print('1层中的LightGBM运行完毕')

    # XGBoost 
Example #28
Source File: test_lgbm_to_pmml_UnitTest.py    From nyoka with Apache License 2.0 4 votes vote down vote up
def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [name for name in auto.columns if name not in ('mpg', 'car name')]
        target_name = 'mpg'
        f_name = "lgbmr_pmml.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ('lgbmr', model)
        ])

        pipeline_obj.fit(auto[feature_names], auto[target_name])

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True) 
Example #29
Source File: skwrapper.py    From Benchmarks with MIT License 4 votes vote down vote up
def get_model(model_or_name, threads=-1, classify=False, seed=0):
    regression_models = {
        'xgboost': (XGBRegressor(max_depth=6, n_jobs=threads, random_state=seed), 'XGBRegressor'),
        'lightgbm': (LGBMRegressor(n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor'),
        'randomforest': (RandomForestRegressor(n_estimators=100, n_jobs=threads), 'RandomForestRegressor'),
        'adaboost': (AdaBoostRegressor(), 'AdaBoostRegressor'),
        'linear': (LinearRegression(), 'LinearRegression'),
        'elasticnet': (ElasticNetCV(positive=True), 'ElasticNetCV'),
        'lasso': (LassoCV(positive=True), 'LassoCV'),
        'ridge': (Ridge(), 'Ridge'),

        'xgb.1k': (XGBRegressor(max_depth=6, n_estimators=1000, n_jobs=threads, random_state=seed), 'XGBRegressor.1K'),
        'xgb.10k': (XGBRegressor(max_depth=6, n_estimators=10000, n_jobs=threads, random_state=seed), 'XGBRegressor.10K'),
        'lgbm.1k': (LGBMRegressor(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor.1K'),
        'lgbm.10k': (LGBMRegressor(n_estimators=10000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMRegressor.10K'),
        'rf.1k': (RandomForestRegressor(n_estimators=1000, n_jobs=threads), 'RandomForestRegressor.1K'),
        'rf.10k': (RandomForestRegressor(n_estimators=10000, n_jobs=threads), 'RandomForestRegressor.10K')
    }

    classification_models = {
        'xgboost': (XGBClassifier(max_depth=6, n_jobs=threads, random_state=seed), 'XGBClassifier'),
        'lightgbm': (LGBMClassifier(n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier'),
        'randomforest': (RandomForestClassifier(n_estimators=100, n_jobs=threads), 'RandomForestClassifier'),
        'adaboost': (AdaBoostClassifier(), 'AdaBoostClassifier'),
        'logistic': (LogisticRegression(), 'LogisticRegression'),
        'gaussian': (GaussianProcessClassifier(), 'GaussianProcessClassifier'),
        'knn': (KNeighborsClassifier(), 'KNeighborsClassifier'),
        'bayes': (GaussianNB(), 'GaussianNB'),
        'svm': (SVC(), 'SVC'),

        'xgb.1k': (XGBClassifier(max_depth=6, n_estimators=1000, n_jobs=threads, random_state=seed), 'XGBClassifier.1K'),
        'xgb.10k': (XGBClassifier(max_depth=6, n_estimators=10000, n_jobs=threads, random_state=seed), 'XGBClassifier.10K'),
        'lgbm.1k': (LGBMClassifier(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier.1K'),
        'lgbm.10k': (LGBMClassifier(n_estimators=1000, n_jobs=threads, random_state=seed, verbose=-1), 'LGBMClassifier.10K'),
        'rf.1k': (RandomForestClassifier(n_estimators=1000, n_jobs=threads), 'RandomForestClassifier.1K'),
        'rf.10k': (RandomForestClassifier(n_estimators=10000, n_jobs=threads), 'RandomForestClassifier.10K')
    }

    if isinstance(model_or_name, str):
        if classify:
            model_and_name = classification_models.get(model_or_name.lower())
        else:
            model_and_name = regression_models.get(model_or_name.lower())
        if not model_and_name:
            raise Exception("unrecognized model: '{}'".format(model_or_name))
        else:
            model, name = model_and_name
    else:
        model = model_or_name
        name = re.search("\w+", str(model)).group(0)

    return model, name 
Example #30
Source File: lightgbm_model.py    From interpret-community with MIT License 4 votes vote down vote up
def __init__(self, multiclass=False, random_state=DEFAULT_RANDOM_STATE,
                 shap_values_output=ShapValuesOutput.DEFAULT, classification=True, **kwargs):
        """Initialize the LightGBM Model.

        Additional arguments to LightGBMClassifier and LightGBMRegressor can be passed through kwargs.

        :param multiclass: Set to true to generate a multiclass model.
        :type multiclass: bool
        :param random_state: Int to seed the model.
        :type random_state: int
        :param shap_values_output: The type of the output from explain_local when using TreeExplainer.
            Currently only types 'default', 'probability' and 'teacher_probability' are supported.  If
            'probability' is specified, then we approximately scale the raw log-odds values from the
            TreeExplainer to probabilities.
        :type shap_values_output: interpret_community.common.constants.ShapValuesOutput
        :param classification: Indicates if this is a classification or regression explanation.
        :type classification: bool
        """
        self.multiclass = multiclass
        initializer_args = _get_initializer_args(kwargs)
        if self.multiclass:
            initializer = LGBMClassifier
        else:
            initializer = LGBMRegressor
        self._lgbm = initializer(random_state=random_state, **initializer_args)
        super(LGBMExplainableModel, self).__init__(**kwargs)
        self._logger.debug('Initializing LGBMExplainableModel')
        self._method = 'lightgbm'
        self._tree_explainer = None
        self._shap_values_output = shap_values_output
        self._classification = classification