Python lightgbm.LGBMClassifier() Examples
The following are 30
code examples of lightgbm.LGBMClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lightgbm
, or try the search function
.
Example #1
Source File: LightGBM_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 7 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50, learning_rate=0.1, n_estimators=modelcount, max_depth=censhu, bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算f1度量 train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #2
Source File: test_cross_validate.py From nyaggle with MIT License | 7 votes |
def test_fit_params_callback(): X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) models = [LGBMClassifier(n_estimators=300) for _ in range(5)] sample_weights = np.random.randint(1, 10, size=len(X_train)) sample_weights = sample_weights / sample_weights.sum() def fit_params(n: int, train_index: List[int], valid_index: List[int]): return { 'early_stopping_rounds': 100, 'sample_weight': list(sample_weights[train_index]), 'eval_sample_weight': [list(sample_weights[valid_index])] } result_w_weight = cross_validate(models, X_train, y_train, X_test, cv=5, eval_func=roc_auc_score, fit_params=fit_params) result_wo_weight = cross_validate(models, X_train, y_train, X_test, cv=5, eval_func=roc_auc_score, fit_params={'early_stopping_rounds': 50}) assert result_w_weight.scores[-1] != result_wo_weight.scores[-1]
Example #3
Source File: prediction_model_factory.py From redshells with MIT License | 6 votes |
def __init__(self): self._models = dict() try: import sklearn.ensemble self._models['RandomForestClassifier'] = sklearn.ensemble.RandomForestClassifier except ImportError: pass try: import xgboost self._models['XGBClassifier'] = xgboost.XGBClassifier except ImportError: pass try: import lightgbm self._models['LGBMClassifier'] = lightgbm.LGBMClassifier except ImportError: pass try: import catboost self._models['CatBoostClassifier'] = catboost.CatBoostClassifier except ImportError: pass
Example #4
Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0 | 6 votes |
def test_01_lgbm_classifier(self): print("\ntest 01 (lgbm classifier with preprocessing) [binary-class]\n") model = LGBMClassifier() pipeline_obj = Pipeline([ ('scaler',MinMaxScaler()), ("model", model) ]) pipeline_obj.fit(self.X,self.Y_bin) file_name = "test01lgbm.pmml" lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file) model_pred = pipeline_obj.predict(self.X) model_prob = pipeline_obj.predict_proba(self.X) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #5
Source File: test_lightgbm.py From hyperparameter_hunter with MIT License | 6 votes |
def exp_lgb_0(): return CVExperiment( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type="gbdt", num_leaves=5, n_estimators=10, max_depth=5, min_child_samples=1, subsample=0.5, verbose=-1, ), ) ################################################## # Optimization Protocol Fixtures ##################################################
Example #6
Source File: test_lightgbm.py From hyperparameter_hunter with MIT License | 6 votes |
def opt_lgb_0(request): optimizer = BayesianOptPro(target_metric=request.param, iterations=2, random_state=32) optimizer.forge_experiment( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type=Categorical(["gbdt", "dart"]), num_leaves=Integer(2, 8), n_estimators=10, max_depth=5, min_child_samples=1, subsample=Real(0.4, 0.7), verbose=-1, ), ) optimizer.go() yield optimizer assert optimizer.target_metric == ("oof", (request.param or "roc_auc")) # lb = pd.read_csv( # # Path(assets_dir) / "HyperparameterHunterAssets" / "Leaderboards" / "GlobalLeaderboard.csv", # Path(assets_dir) / "Leaderboards" / "GlobalLeaderboard.csv", # ) # assert lb.columns[0] == f"oof_{request.param}"
Example #7
Source File: testScoreWithAdapaLgbm.py From nyoka with Apache License 2.0 | 6 votes |
def test_02_lgbm_classifier(self): print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n") model = LGBMClassifier() pipeline_obj = Pipeline([ ('scaler',MaxAbsScaler()), ("model", model) ]) pipeline_obj.fit(self.X,self.Y) file_name = "test02lgbm.pmml" lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file) model_pred = pipeline_obj.predict(self.X) model_prob = pipeline_obj.predict_proba(self.X) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #8
Source File: test_flofo_importance.py From lofo-importance with MIT License | 6 votes |
def test_flofo_importance(): df = generate_test_data(100000) df.loc[df["A"] < df["A"].median(), "A"] = None train_df, val_df = train_test_split(df, test_size=0.2, random_state=0) val_df_checkpoint = val_df.copy() features = ["A", "B", "C", "D"] lgbm = LGBMClassifier(random_state=0, n_jobs=1) lgbm.fit(train_df[features], train_df["binary_target"]) flofo = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc') flofo_parallel = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc', n_jobs=3) importance_df = flofo.get_importance() importance_df_parallel = flofo_parallel.get_importance() is_feature_order_same = importance_df["feature"].values == importance_df_parallel["feature"].values plot_importance(importance_df) assert is_feature_order_same.sum() == len(features), "Parallel FLOFO returned different result!" assert val_df.equals(val_df_checkpoint), "LOFOImportance mutated the dataframe!" assert len(features) == importance_df.shape[0], "Missing importance value for some features!" assert importance_df["feature"].values[0] == "B", "Most important feature is different than B!"
Example #9
Source File: test_lightgbm.py From m2cgen with MIT License | 6 votes |
def test_multi_class(): estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1, max_depth=1) estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() exponent = ast.ExpExpr( ast.NumVal(-1.0986122886681098), to_reuse=True) exponent_sum = ast.BinNumExpr( ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD), exponent, ast.BinNumOpType.ADD, to_reuse=True) softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV) expected = ast.VectorVal([softmax] * 3) assert utils.cmp_exprs(actual, expected)
Example #10
Source File: test_json_pretty_viz.py From lale with Apache License 2.0 | 6 votes |
def test_autoai_libs_t_no_op(self): from lale.lib.autoai_libs import TNoOp from lightgbm import LGBMClassifier from lale.operators import make_pipeline t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[]) lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18) pipeline = make_pipeline(t_no_op, lgbm_classifier) expected = \ """from lale.lib.autoai_libs import TNoOp from lightgbm import LGBMClassifier from lale.operators import make_pipeline t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[]) lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18) pipeline = make_pipeline(t_no_op, lgbm_classifier)""" self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))
Example #11
Source File: utils.py From m2cgen with MIT License | 6 votes |
def __call__(self, estimator): fitted_estimator = estimator.fit(self.X_train, self.y_train) if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC, LightBaseClassifier)): y_pred = estimator.decision_function(self.X_test) elif isinstance(estimator, DecisionTreeClassifier): y_pred = estimator.predict_proba(self.X_test.astype(np.float32)) elif isinstance( estimator, (ForestClassifier, XGBClassifier, LGBMClassifier)): y_pred = estimator.predict_proba(self.X_test) else: y_pred = estimator.predict(self.X_test) return self.X_test, y_pred, fitted_estimator
Example #12
Source File: test_cross_validate.py From nyaggle with MIT License | 6 votes |
def test_cv_lgbm_df(): X, y = make_classification_df(n_samples=1024, n_num_features=20, n_cat_features=1, class_sep=0.98, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) models = [LGBMClassifier(n_estimators=300) for _ in range(5)] pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5, eval_func=roc_auc_score) print(scores) assert len(scores) == 5 + 1 assert scores[-1] >= 0.85 # overall roc_auc assert roc_auc_score(y_train, pred_oof) == scores[-1] assert roc_auc_score(y_test, pred_test) >= 0.85 # test roc_auc assert roc_auc_score(y_test, models[0].predict_proba(X_test)[:, 1]) >= 0.85 # make sure models are trained assert len(importance) == 5 assert list(importance[0].columns) == ['feature', 'importance'] assert len(importance[0]) == 20 + 1 assert models[0].booster_.num_trees() < 300 # making sure early stopping worked
Example #13
Source File: test_cross_validate.py From nyaggle with MIT License | 6 votes |
def test_cv_lgbm(): X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) models = [LGBMClassifier(n_estimators=300) for _ in range(5)] pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5, eval_func=roc_auc_score, fit_params={'early_stopping_rounds': 200}) print(scores) assert len(scores) == 5 + 1 assert scores[-1] >= 0.85 # overall roc_auc assert roc_auc_score(y_train, pred_oof) == scores[-1] assert roc_auc_score(y_test, pred_test) >= 0.85 # test roc_auc assert roc_auc_score(y, models[0].predict_proba(X)[:, 1]) >= 0.85 # make sure models are trained assert len(importance) == 5 assert list(importance[0].columns) == ['feature', 'importance'] assert len(importance[0]) == 20
Example #14
Source File: LightGBM_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def recspre(estrs, predata, datadict, zhe): mo, ze = estrs.split('-') model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50, learning_rate=0.1, n_estimators=int(mo), max_depth=int(ze), bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2) model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1]) # 预测 yucede = model.predict(predata[:, :-1]) # 计算混淆矩阵 print(ConfuseMatrix(predata[:, -1], yucede)) return fmse(predata[:, -1], yucede) # 主函数
Example #15
Source File: test_core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_classifier(output, centers, client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('classification', output=output, centers=centers) a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw, client=client) p1 = a.predict(dX, client=client) s1 = accuracy_score(dy, p1) p1 = p1.compute() b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict(X) s2 = b.score(X, y) print(confusion_matrix(y, p1)) print(confusion_matrix(y, p2)) assert_eq(s1, s2) print(s1) assert_eq(p1, p2) assert_eq(y, p1) assert_eq(y, p2)
Example #16
Source File: test_lofo_importance.py From lofo-importance with MIT License | 6 votes |
def test_feature_groups(): df = generate_test_data(1000, text=True) features = ["A", "B", "C", "D"] cv = CountVectorizer(ngram_range=(3, 3), analyzer="char") feature_groups = dict() feature_groups["names"] = cv.fit_transform(df["T"]) feature_groups["interactions"] = df[["A", "B"]].values*df[["C", "D"]].values dataset = Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups) lgbm = LGBMClassifier(random_state=0, n_jobs=4) lofo = LOFOImportance(dataset, model=lgbm, cv=4, scoring='roc_auc') importance_df = lofo.get_importance() assert len(features) + len(feature_groups) == importance_df.shape[0], "Missing importance value for some features!" assert importance_df["feature"].values[0] == "names", "Most important feature is different than 'names'!"
Example #17
Source File: test_json_pretty_viz.py From lale with Apache License 2.0 | 6 votes |
def test_autoai_libs_tam_2(self): from lale.lib.autoai_libs import TAM import numpy as np from lightgbm import LGBMClassifier from sklearn.decomposition import PCA from lale.operators import make_pipeline pca = PCA(copy=False) tam = TAM(tans_class=pca, name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')]) lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18) pipeline = make_pipeline(tam, lgbm_classifier) expected = \ """from lale.lib.autoai_libs import TAM import sklearn.decomposition.pca import numpy as np from lightgbm import LGBMClassifier from lale.operators import make_pipeline tam = TAM(tans_class=sklearn.decomposition.pca.PCA(copy=False, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False), name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')]) lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18) pipeline = make_pipeline(tam, lgbm_classifier)""" self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))
Example #18
Source File: demo_IJCAI_2018.py From Feature-Selection with MIT License | 5 votes |
def main(): sf = ss.Select(Sequence = True, Random = True, Cross = False) #select the way you want to process searching sf.ImportDF(prepareData(),label = 'is_trade') sf.ImportLossFunction(modelscore,direction = 'descend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(['used','instance_id', 'item_property_list', 'context_id', 'context_timestamp', 'predict_category_property', 'is_trade']) sf.InitialFeatures(['item_category_list', 'item_price_level','item_sales_level','item_collected_level', 'item_pv_level','day']) sf.GenerateCol(key = 'mean', selectstep = 2) sf.SetSample(0.1, samplemode = 0, samplestate = 0) # sf.SetFeaturesLimit(5) sf.SetTimeLimit(1) sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=8) sf.SetLogFile('recordml.log') sf.run(validation)
Example #19
Source File: test_lightgbm.py From sklearn-onnx with MIT License | 5 votes |
def setUpClass(self): update_registered_converter( LGBMClassifier, 'LightGbmLGBMClassifier', calculate_linear_classifier_output_shapes, convert_lightgbm, options={ 'zipmap': [True, False], 'nocl': [True, False]}) update_registered_converter( LGBMRegressor, 'LgbmRegressor', calculate_linear_regressor_output_shapes, convert_lightgbm)
Example #20
Source File: sklearn.py From optuna with MIT License | 5 votes |
def __init__(self, *args, **kwargs): # type: (List[Any], Dict[str, Any]) -> None warnings.warn( "LightGBMTuner doesn't support sklearn API. " "Use `train()` or `LightGBMTuner` for hyperparameter tuning." ) super(LGBMClassifier, self).__init__(*args, **kwargs)
Example #21
Source File: tester.py From Text-Classification-Benchmark with MIT License | 5 votes |
def init_estimators(): return [ {'classifier': 'NB', 'model': MultinomialNB()}, {'classifier': 'LR', 'model': LogisticRegression(random_state=42)}, {'classifier': 'L-SVM', 'model': LinearSVC(max_iter=1000, random_state=42)}, {'classifier': 'RBF-SVM', 'model': SVC(max_iter=1000, random_state=42)}, {'classifier': 'RF', 'model': RandomForestClassifier(n_estimators=100, random_state=42)}, {'classifier': 'XGB', 'model': XGBClassifier(n_estimators=100, random_state=42)}, {'classifier': 'LGBM', 'model': LGBMClassifier(n_estimators=100, random_state=42)}, ]
Example #22
Source File: S1elect.py From Feature-Selection with MIT License | 5 votes |
def main(): sf = sequence_selection as ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程 sf.ImportDF(prepareData(),label = 'buy') #导入数据集以及目标标签 sf.ImportLossFunction(score1, direction = 'ascend') #导入评价函数以及优化方向 #sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征 sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合 sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme) sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程 sf.SetTimeLimit(100) #设置算法运行最长时间,以分钟为单位 sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型 sf.SetLogFile('record.log') #初始化日志文件 sf.run(validate) #输入检验函数并开始运行
Example #23
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def _checkGBDTClassifier(gbdt): if hasattr(gbdt, "apply"): return gbdt else: try: from lightgbm import LGBMClassifier if isinstance(gbdt, LGBMClassifier): return gbdt except ImportError: pass raise ValueError("GBDT class " + _class_name(gbdt) + " is not supported")
Example #24
Source File: test_lightgbm.py From sklearn-onnx with MIT License | 5 votes |
def test_lightgbm_classifier(self): model = LGBMClassifier(n_estimators=3, min_child_samples=1) dump_binary_classification( model, allow_failure="StrictVersion(onnx.__version__) < " "StrictVersion('1.3.0')") dump_multiple_classification( model, allow_failure="StrictVersion(onnx.__version__) < " "StrictVersion('1.3.0')")
Example #25
Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License | 5 votes |
def test_lightgbm_classifier_zipmap(self): X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1] model = LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) onx = convert_model( model, 'dummy', input_types=[('X', FloatTensorType([None, X.shape[1]]))]) assert "zipmap" in str(onx).lower()
Example #26
Source File: lightgbm_model.py From interpret-community with MIT License | 5 votes |
def model(self): """Retrieve the underlying model. :return: The lightgbm model, either classifier or regressor. :rtype: Union[LGBMClassifier, LGBMRegressor] """ return self._lgbm
Example #27
Source File: common_utils.py From interpret-community with MIT License | 5 votes |
def create_lightgbm_classifier(X, y): lgbm = LGBMClassifier(boosting_type='gbdt', learning_rate=0.1, max_depth=5, n_estimators=200, n_jobs=1, random_state=777) model = lgbm.fit(X, y) return model
Example #28
Source File: test_misc_explainers.py From interpret-community with MIT License | 5 votes |
def _get_create_model(self, classification): if classification: model = LGBMClassifier() else: model = LGBMRegressor() def create_model(x, y): return model.fit(x, y) return create_model
Example #29
Source File: test_onnxmltools.py From onnxconverter-common with MIT License | 5 votes |
def test_lightgbm(self): X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lightgbm.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) onx = xmlt.convert( model, 'dummy', initial_types=[('X', FloatTensorType([None, X.shape[1]]))], target_opset=9) assert "ir_version: 4" in str(onx).lower()
Example #30
Source File: _parse.py From onnxmltools with MIT License | 5 votes |
def _parse_lightgbm(scope, model, inputs): ''' This is a delegate function. It doesn't nothing but invoke the correct parsing function according to the input model's type. :param scope: Scope object :param model: A lightgbm object :param inputs: A list of variables :return: The output variables produced by the input model ''' if isinstance(model, LGBMClassifier): return _parse_sklearn_classifier(scope, model, inputs) if (isinstance(model, WrappedBooster) and model.operator_name == 'LgbmClassifier'): return _parse_sklearn_classifier(scope, model, inputs) return _parse_lightgbm_simple_model(scope, model, inputs)