Python sklearn.ensemble.GradientBoostingClassifier() Examples
The following are 30
code examples of sklearn.ensemble.GradientBoostingClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: grid_search_cv.py From text-classifier with Apache License 2.0 | 7 votes |
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)): # grid search找到最好的参数 parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]} clf = GridSearchCV(model, param_grid=parameters) grid_search = clf.fit(x_train, y_train) # 对结果打分 print("Best score: %0.3f" % grid_search.best_score_) print(grid_search.best_estimator_) # best prarams print('best prarams:', clf.best_params_) print('-----grid search end------------') print('on all train set') scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy') print(scores.mean(), scores) print('on test set') scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy') print(scores.mean(), scores)
Example #2
Source File: test_partial_dependence.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_recursion_decision_function(target_feature): # Make sure the recursion method (implicitly uses decision_function) has # the same result as using brute method with # response_method=decision_function X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1) assert np.mean(y) == .5 # make sure the init estimator predicts 0 anyway est = GradientBoostingClassifier(random_state=0, loss='deviance') est.fit(X, y) preds_1, _ = partial_dependence(est, X, [target_feature], response_method='decision_function', method='recursion') preds_2, _ = partial_dependence(est, X, [target_feature], response_method='decision_function', method='brute') assert_allclose(preds_1, preds_2, atol=1e-7)
Example #3
Source File: GBDT_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def recspre(estrs, predata, datadict, zhe): mo, ze = estrs.split('-') model = GradientBoostingClassifier(loss='deviance', n_estimators=int(mo), max_depth=int(ze), learning_rate=0.1) model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1]) # 预测 yucede = model.predict(predata[:, :-1]) # 计算混淆矩阵 print(ConfuseMatrix(predata[:, -1], yucede)) return fmse(predata[:, -1], yucede) # 主函数
Example #4
Source File: predict.py From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License | 6 votes |
def gbc_gp_predict(train_x, train_y, test_x): feature_indexs = getTopFeatures(train_x, train_y) sub_x_Train = get_data(train_x, feature_indexs[:16], features.feature_pair_sub_list ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20]) sub_x_Test = get_data(test_x, feature_indexs[:16], features.feature_pair_sub_list ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20]) labels = toLabels(train_y) gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=9) gbc.fit(sub_x_Train, labels) pred_probs = gbc.predict_proba(sub_x_Test)[:,1] ind_test = np.where(pred_probs>0.55)[0] gp_preds_part = gbc_gp_predict_part(sub_x_Train, train_y, sub_x_Test[ind_test]) gp_preds = np.zeros(len(test_x)) gp_preds[ind_test] = gp_preds_part return gp_preds # invoke the function gbc_svr_predict_part
Example #5
Source File: test_general.py From hyperparameter_hunter with MIT License | 6 votes |
def env_5(request): return Environment( train_dataset=get_breast_cancer_data(), results_path=assets_dir, target_column="diagnosis", metrics=["roc_auc_score"], cv_type=StratifiedKFold, cv_params=dict(n_splits=3, shuffle=True, random_state=32), experiment_recorders=request.param, ) ################################################## # Experiment Fixtures ################################################## #################### GradientBoostingClassifier Experiments ####################
Example #6
Source File: GradientBoostingClassifier.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, options): self.handle_options(options) params = options.get('params', {}) out_params = convert_params( params, strs=['loss', 'max_features'], floats=['learning_rate', 'min_weight_fraction_leaf'], ints=['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'random_state'], ) valid_loss = ['deviance', 'exponential'] if 'loss' in out_params: if out_params['loss'] not in valid_loss: msg = "loss must be one of: {}".format(', '.join(valid_loss)) raise RuntimeError(msg) if 'max_features' in out_params: out_params['max_features'] = handle_max_features(out_params['max_features']) if 'max_leaf_nodes' in out_params and 'max_depth' in out_params: messages.warn('max_depth ignored when max_leaf_nodes is set') self.estimator = _GradientBoostingClassifier(**out_params)
Example #7
Source File: test_full_pipelines.py From python-sasctl with Apache License 2.0 | 6 votes |
def test_register_model(self, iris_dataset): pytest.importorskip('sklearn') from sasctl import register_model from sklearn.ensemble import GradientBoostingClassifier TARGET = 'Species' X = iris_dataset.drop(TARGET, axis=1) y = iris_dataset[TARGET] model = GradientBoostingClassifier() model.fit(X, y) model = register_model(model, self.MODEL_NAME, self.PROJECT_NAME, input=X, force=True) assert model.name == self.MODEL_NAME assert model.projectName == self.PROJECT_NAME assert model.function.lower() == 'classification' assert model.algorithm.lower() == 'gradient boosting' assert model.tool.lower().startswith('python')
Example #8
Source File: GBDT_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = GradientBoostingClassifier(loss='deviance', n_estimators=modelcount, max_depth=censhu, learning_rate=0.1, max_features='sqrt') model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #9
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_classification_synthetic(presort, loss): # Test GradientBoostingClassifier on synthetic dataset used by # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.09) gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, subsample=0.5, random_state=0, presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.08)
Example #10
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_probability_log(): # Predict probabilities. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert np.all(y_proba >= 0.0) assert np.all(y_proba <= 1.0) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)
Example #11
Source File: gradient_boosting_blending.py From DataMiningCompetitionFirstPrize with MIT License | 6 votes |
def learn(x, y, test_x): # set sample weight weight_list = [] for j in range(len(y)): if y[j] == "0": weight_list.append(variables.weight_0_gdbt_b) if y[j] == "1000": weight_list.append(variables.weight_1000_gdbt_b) if y[j] == "1500": weight_list.append(variables.weight_1500_gdbt_b) if y[j] == "2000": weight_list.append(variables.weight_2000_gdbt_b) clf = GradientBoostingClassifier(loss='deviance', n_estimators=variables.n_estimators_gdbt_b, learning_rate=variables.learning_rate_gdbt_b, max_depth=variables.max_depth_gdbt_b, random_state=0, min_samples_split=variables.min_samples_split_gdbt_b, min_samples_leaf=variables.min_samples_leaf_gdbt_b, subsample=variables.subsample_gdbt_b, ).fit(x, y, weight_list) prediction_list = clf.predict(test_x) return prediction_list
Example #12
Source File: GradientBoosting.py From Awesome-Scripts with MIT License | 6 votes |
def main(): # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/phishing.data', split, trainingSet, testSet) print('Train set: ' + repr(len(trainingSet))) print('Test set: ' + repr(len(testSet))) trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = GradientBoostingClassifier() clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
Example #13
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_check_inputs_predict_stages(): # check that predict_stages through an error if the type of X is not # supported x, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) x_sparse_csc = csc_matrix(x) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(x, y) score = np.zeros((y.shape)).reshape(-1, 1) assert_raise_message(ValueError, "When X is a sparse matrix, a CSR format is expected", predict_stages, clf.estimators_, x_sparse_csc, clf.learning_rate, score) x_fortran = np.asfortranarray(x) assert_raise_message(ValueError, "X should be C-ordered np.ndarray", predict_stages, clf.estimators_, x_fortran, clf.learning_rate, score)
Example #14
Source File: test_partial_dependence.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warning_recursion_non_constant_init(): # make sure that passing a non-constant init parameter to a GBDT and using # recursion method yields a warning. gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0) gbc.fit(X, y) with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion') with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion')
Example #15
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_early_stopping_n_classes(): # when doing early stopping (_, , y_train, _ = train_test_split(X, y)) # there might be classes in y that are missing in y_train. As the init # estimator will be trained on y_train, we need to raise an error if this # happens. X = [[1]] * 10 y = [0, 0] + [1] * 8 # only 2 negative class over 10 samples gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0, validation_fraction=8) with pytest.raises( ValueError, match='The training data after the early stopping split'): gb.fit(X, y) # No error if we let training data be big enough gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0, validation_fraction=4)
Example #16
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_probability_exponential(): # Predict probabilities. clf = GradientBoostingClassifier(loss='exponential', n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert np.all(y_proba >= 0.0) assert np.all(y_proba <= 1.0) score = clf.decision_function(T).ravel() assert_array_almost_equal(y_proba[:, 1], expit(2 * score)) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)
Example #17
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_zero_estimator_clf(): # Test if init='zero' works for classification. X = iris.data y = np.array(iris.target) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(X, y) assert_greater(est.score(X, y), 0.96) # binary clf mask = y != 0 y[mask] = 1 y[~mask] = 0 est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(X, y) assert_greater(est.score(X, y), 0.96) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='foobar') assert_raises(ValueError, est.fit, X, y)
Example #18
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_more_verbose_output(): # Check verbose=2 does not cause error. from io import StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() clf = GradientBoostingClassifier(n_estimators=100, random_state=1, verbose=2) clf.fit(X, y) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header = verbose_output.readline().rstrip() # no OOB true_header = ' '.join(['%10s'] + ['%16s'] * 2) % ( 'Iter', 'Train Loss', 'Remaining Time') assert_equal(true_header, header) n_lines = sum(1 for l in verbose_output.readlines()) # 100 lines for n_estimators==100 assert_equal(100, n_lines)
Example #19
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_verbose_output(): # Check verbose=1 does not cause error. from io import StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() clf = GradientBoostingClassifier(n_estimators=100, random_state=1, verbose=1, subsample=0.8) clf.fit(X, y) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header = verbose_output.readline().rstrip() # with OOB true_header = ' '.join(['%10s'] + ['%16s'] * 3) % ( 'Iter', 'Train Loss', 'OOB Improve', 'Remaining Time') assert_equal(true_header, header) n_lines = sum(1 for l in verbose_output.readlines()) # one for 1-10 and then 9 for 20-100 assert_equal(10 + 9, n_lines)
Example #20
Source File: test_boosted_trees_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston import numpy as np scikit_data = load_boston() scikit_model = GradientBoostingClassifier(random_state=1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) self.target = target # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
Example #21
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_serialization(): # Check model serialization. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_)) try: import cPickle as pickle except ImportError: import pickle serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL) clf = None clf = pickle.loads(serialized_clf) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
Example #22
Source File: sentiment_analysis_ml.py From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 | 5 votes |
def gbdt_classifier(train_vecs,y_train,test_vecs,y_test): clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=10,random_state=0) clf.fit(train_vecs,y_train) joblib.dump(clf,storedpaths+'model_gbdt.pkl') test_scores=clf.score(test_vecs,y_test) return test_scores # 训练近邻分类算法
Example #23
Source File: classification.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def create_classifiers(nb_workers=-1): """ create all classifiers with default parameters :param int nb_workers: number of parallel if possible :return dict: {str: clf} >>> classifs = create_classifiers() >>> classifs # doctest: +ELLIPSIS {...} >>> sum([isinstance(create_clf_param_search_grid(k), dict) ... for k in classifs.keys()]) 7 >>> sum([isinstance(create_clf_param_search_distrib(k), dict) ... for k in classifs.keys()]) 7 """ clfs = { 'RandForest': ensemble.RandomForestClassifier(n_estimators=20, # oob_score=True, min_samples_leaf=2, min_samples_split=3, n_jobs=nb_workers), 'GradBoost': ensemble.GradientBoostingClassifier(subsample=0.25, warm_start=False, max_depth=6, min_samples_leaf=6, n_estimators=200, min_samples_split=7), 'LogistRegr': linear_model.LogisticRegression(solver='sag', n_jobs=nb_workers), 'KNN': neighbors.KNeighborsClassifier(n_jobs=nb_workers), 'SVM': svm.SVC(kernel='rbf', probability=True, tol=2e-3, max_iter=5000), 'DecTree': tree.DecisionTreeClassifier(), # 'RBM': create_pipeline_neuron_net(), 'AdaBoost': ensemble.AdaBoostClassifier(n_estimators=5), # 'NuSVM-rbf': svm.NuSVC(kernel='rbf', probability=True), } return clfs
Example #24
Source File: predict.py From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License | 5 votes |
def gbc_classify(train_x, train_y): feature_indexs = getTopFeatures(train_x, train_y) sub_x_Train = get_data(train_x, feature_indexs[:16], features.feature_pair_sub_list ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20], features.feature_pair_sub_mul_list[:20]) labels = toLabels(train_y) gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=8) gbc.fit(sub_x_Train, labels) return gbc # use svm to predict the loss, based on the result of gbm classifier
Example #25
Source File: test_boosted_trees_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_conversion_bad_inputs(self): # Error on converting an untrained model with self.assertRaises(Exception): model = GradientBoostingClassifier() spec = skl_converter.convert(model, "data", "out") # Check the expected class during covnersion. from sklearn.preprocessing import OneHotEncoder with self.assertRaises(Exception): model = OneHotEncoder() spec = skl_converter.convert(model, "data", "out")
Example #26
Source File: binary.py From stacking with MIT License | 5 votes |
def build_model(self): return GradientBoostingClassifier(**self.params)
Example #27
Source File: utils.py From disentanglement_lib with Apache License 2.0 | 5 votes |
def gradient_boosting_classifier(): """Default gradient boosting classifier.""" return GradientBoostingClassifier()
Example #28
Source File: test_boosted_trees_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = GradientBoostingClassifier(random_state=1) target = scikit_data["target"] > scikit_data["target"].mean() scikit_model.fit(scikit_data["data"], target) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
Example #29
Source File: test_general.py From hyperparameter_hunter with MIT License | 5 votes |
def exp_gbc_1(): return CVExperiment(GradientBoostingClassifier, dict(subsample=0.5)) #################### KNeighborsClassifier Experiments ####################
Example #30
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 5 votes |
def test_import_from_sklearn_pipeline_noop(self): from sklearn.pipeline import Pipeline from sklearn.ensemble import GradientBoostingClassifier from lale.helpers import import_from_sklearn_pipeline pipe = Pipeline([('noop', None), ('gbc', GradientBoostingClassifier())]) with self.assertRaises(ValueError): imported_pipeline = import_from_sklearn_pipeline(pipe)