Python sklearn.ensemble.GradientBoostingClassifier() Examples

The following are 30 code examples of sklearn.ensemble.GradientBoostingClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .
Example #1
Source File: grid_search_cv.py    From text-classifier with Apache License 2.0 7 votes vote down vote up
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)):
    # grid search找到最好的参数
    parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]}
    clf = GridSearchCV(model, param_grid=parameters)
    grid_search = clf.fit(x_train, y_train)
    # 对结果打分
    print("Best score: %0.3f" % grid_search.best_score_)
    print(grid_search.best_estimator_)

    # best prarams
    print('best prarams:', clf.best_params_)

    print('-----grid search end------------')
    print('on all train set')
    scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy')
    print(scores.mean(), scores)
    print('on test set')
    scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy')
    print(scores.mean(), scores) 
Example #2
Source File: test_partial_dependence.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_recursion_decision_function(target_feature):
    # Make sure the recursion method (implicitly uses decision_function) has
    # the same result as using brute method with
    # response_method=decision_function

    X, y = make_classification(n_classes=2, n_clusters_per_class=1,
                               random_state=1)
    assert np.mean(y) == .5  # make sure the init estimator predicts 0 anyway

    est = GradientBoostingClassifier(random_state=0, loss='deviance')
    est.fit(X, y)

    preds_1, _ = partial_dependence(est, X, [target_feature],
                                    response_method='decision_function',
                                    method='recursion')
    preds_2, _ = partial_dependence(est, X, [target_feature],
                                    response_method='decision_function',
                                    method='brute')

    assert_allclose(preds_1, preds_2, atol=1e-7) 
Example #3
Source File: GBDT_Classify_adult.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def recspre(estrs, predata, datadict, zhe):

    mo, ze = estrs.split('-')
    model = GradientBoostingClassifier(loss='deviance', n_estimators=int(mo), max_depth=int(ze), learning_rate=0.1)

    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 计算混淆矩阵

    print(ConfuseMatrix(predata[:, -1], yucede))

    return fmse(predata[:, -1], yucede)

# 主函数 
Example #4
Source File: predict.py    From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def gbc_gp_predict(train_x, train_y, test_x):
    feature_indexs = getTopFeatures(train_x, train_y)
    sub_x_Train = get_data(train_x, feature_indexs[:16], features.feature_pair_sub_list
            ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20])
    sub_x_Test = get_data(test_x, feature_indexs[:16], features.feature_pair_sub_list
            ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20])
    labels = toLabels(train_y)
    gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=9)
    gbc.fit(sub_x_Train, labels)
    pred_probs = gbc.predict_proba(sub_x_Test)[:,1]
    ind_test = np.where(pred_probs>0.55)[0]
    gp_preds_part = gbc_gp_predict_part(sub_x_Train, train_y, sub_x_Test[ind_test])
    gp_preds = np.zeros(len(test_x))
    gp_preds[ind_test] = gp_preds_part
    return gp_preds


# invoke the function gbc_svr_predict_part 
Example #5
Source File: test_general.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def env_5(request):
    return Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=3, shuffle=True, random_state=32),
        experiment_recorders=request.param,
    )


##################################################
# Experiment Fixtures
##################################################
#################### GradientBoostingClassifier Experiments #################### 
Example #6
Source File: GradientBoostingClassifier.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            strs=['loss', 'max_features'],
            floats=['learning_rate', 'min_weight_fraction_leaf'],
            ints=['n_estimators', 'max_depth', 'min_samples_split',
                  'min_samples_leaf', 'max_leaf_nodes', 'random_state'],
        )

        valid_loss = ['deviance', 'exponential']
        if 'loss' in out_params:
            if out_params['loss'] not in valid_loss:
                msg = "loss must be one of: {}".format(', '.join(valid_loss))
                raise RuntimeError(msg)

        if 'max_features' in out_params:
            out_params['max_features'] = handle_max_features(out_params['max_features'])

        if 'max_leaf_nodes' in out_params and 'max_depth' in out_params:
            messages.warn('max_depth ignored when max_leaf_nodes is set')

        self.estimator = _GradientBoostingClassifier(**out_params) 
Example #7
Source File: test_full_pipelines.py    From python-sasctl with Apache License 2.0 6 votes vote down vote up
def test_register_model(self, iris_dataset):
        pytest.importorskip('sklearn')
        from sasctl import register_model
        from sklearn.ensemble import GradientBoostingClassifier

        TARGET = 'Species'

        X = iris_dataset.drop(TARGET, axis=1)
        y = iris_dataset[TARGET]

        model = GradientBoostingClassifier()
        model.fit(X, y)

        model = register_model(model, self.MODEL_NAME, self.PROJECT_NAME, input=X, force=True)
        assert model.name == self.MODEL_NAME
        assert model.projectName == self.PROJECT_NAME
        assert model.function.lower() == 'classification'
        assert model.algorithm.lower() == 'gradient boosting'
        assert model.tool.lower().startswith('python') 
Example #8
Source File: GBDT_Classify_adult.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = GradientBoostingClassifier(loss='deviance', n_estimators=modelcount, max_depth=censhu, learning_rate=0.1, max_features='sqrt')

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #9
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def check_classification_synthetic(presort, loss):
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, random_state=0)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.09)

    gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, subsample=0.5,
                                      random_state=0,
                                      presort=presort)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.08) 
Example #10
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_probability_log():
    # Predict probabilities.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert np.all(y_proba >= 0.0)
    assert np.all(y_proba <= 1.0)

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result) 
Example #11
Source File: gradient_boosting_blending.py    From DataMiningCompetitionFirstPrize with MIT License 6 votes vote down vote up
def learn(x, y, test_x):
    # set sample weight


    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_gdbt_b)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_gdbt_b)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_gdbt_b)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_gdbt_b)

    clf = GradientBoostingClassifier(loss='deviance', n_estimators=variables.n_estimators_gdbt_b,
                                     learning_rate=variables.learning_rate_gdbt_b,
                                     max_depth=variables.max_depth_gdbt_b, random_state=0,
                                     min_samples_split=variables.min_samples_split_gdbt_b,
                                     min_samples_leaf=variables.min_samples_leaf_gdbt_b,
                                     subsample=variables.subsample_gdbt_b,
                                     ).fit(x, y, weight_list)
    prediction_list = clf.predict(test_x)

    return prediction_list 
Example #12
Source File: GradientBoosting.py    From Awesome-Scripts with MIT License 6 votes vote down vote up
def main():
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/phishing.data', split, trainingSet, testSet)
	print('Train set: ' + repr(len(trainingSet)))
	print('Test set: ' + repr(len(testSet)))
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = GradientBoostingClassifier()
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy) 
Example #13
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_check_inputs_predict_stages():
    # check that predict_stages through an error if the type of X is not
    # supported
    x, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    x_sparse_csc = csc_matrix(x)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(x, y)
    score = np.zeros((y.shape)).reshape(-1, 1)
    assert_raise_message(ValueError,
                         "When X is a sparse matrix, a CSR format is expected",
                         predict_stages, clf.estimators_, x_sparse_csc,
                         clf.learning_rate, score)
    x_fortran = np.asfortranarray(x)
    assert_raise_message(ValueError,
                         "X should be C-ordered np.ndarray",
                         predict_stages, clf.estimators_, x_fortran,
                         clf.learning_rate, score) 
Example #14
Source File: test_partial_dependence.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_warning_recursion_non_constant_init():
    # make sure that passing a non-constant init parameter to a GBDT and using
    # recursion method yields a warning.

    gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
    gbc.fit(X, y)

    with pytest.warns(
            UserWarning,
            match='Using recursion method with a non-constant init predictor'):
        partial_dependence(gbc, X, [0], method='recursion')

    with pytest.warns(
            UserWarning,
            match='Using recursion method with a non-constant init predictor'):
        partial_dependence(gbc, X, [0], method='recursion') 
Example #15
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_early_stopping_n_classes():
    # when doing early stopping (_, , y_train, _ = train_test_split(X, y))
    # there might be classes in y that are missing in y_train. As the init
    # estimator will be trained on y_train, we need to raise an error if this
    # happens.

    X = [[1]] * 10
    y = [0, 0] + [1] * 8  # only 2 negative class over 10 samples
    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0,
                                    validation_fraction=8)
    with pytest.raises(
                ValueError,
                match='The training data after the early stopping split'):
        gb.fit(X, y)

    # No error if we let training data be big enough
    gb = GradientBoostingClassifier(n_iter_no_change=5, random_state=0,
                                    validation_fraction=4) 
Example #16
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_probability_exponential():
    # Predict probabilities.
    clf = GradientBoostingClassifier(loss='exponential',
                                     n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert np.all(y_proba >= 0.0)
    assert np.all(y_proba <= 1.0)
    score = clf.decision_function(T).ravel()
    assert_array_almost_equal(y_proba[:, 1], expit(2 * score))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result) 
Example #17
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_zero_estimator_clf():
    # Test if init='zero' works for classification.
    X = iris.data
    y = np.array(iris.target)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    # binary clf
    mask = y != 0
    y[mask] = 1
    y[~mask] = 0
    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)
    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='foobar')
    assert_raises(ValueError, est.fit, X, y) 
Example #18
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_more_verbose_output():
    # Check verbose=2 does not cause error.
    from io import StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                     verbose=2)
    clf.fit(X, y)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header = verbose_output.readline().rstrip()
    # no OOB
    true_header = ' '.join(['%10s'] + ['%16s'] * 2) % (
        'Iter', 'Train Loss', 'Remaining Time')
    assert_equal(true_header, header)

    n_lines = sum(1 for l in verbose_output.readlines())
    # 100 lines for n_estimators==100
    assert_equal(100, n_lines) 
Example #19
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_verbose_output():
    # Check verbose=1 does not cause error.
    from io import StringIO

    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                     verbose=1, subsample=0.8)
    clf.fit(X, y)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header = verbose_output.readline().rstrip()
    # with OOB
    true_header = ' '.join(['%10s'] + ['%16s'] * 3) % (
        'Iter', 'Train Loss', 'OOB Improve', 'Remaining Time')
    assert_equal(true_header, header)

    n_lines = sum(1 for l in verbose_output.readlines())
    # one for 1-10 and then 9 for 20-100
    assert_equal(10 + 9, n_lines) 
Example #20
Source File: test_boosted_trees_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
Example #21
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_serialization():
    # Check model serialization.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    try:
        import cPickle as pickle
    except ImportError:
        import pickle

    serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL)
    clf = None
    clf = pickle.loads(serialized_clf)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_)) 
Example #22
Source File: sentiment_analysis_ml.py    From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 5 votes vote down vote up
def gbdt_classifier(train_vecs,y_train,test_vecs,y_test):
    clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=10,random_state=0)
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_gbdt.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练近邻分类算法 
Example #23
Source File: classification.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_classifiers(nb_workers=-1):
    """ create all classifiers with default parameters

    :param int nb_workers: number of parallel if possible
    :return dict: {str: clf}

    >>> classifs = create_classifiers()
    >>> classifs  # doctest: +ELLIPSIS
    {...}
    >>> sum([isinstance(create_clf_param_search_grid(k), dict)
    ...      for k in classifs.keys()])
    7
    >>> sum([isinstance(create_clf_param_search_distrib(k), dict)
    ...      for k in classifs.keys()])
    7
    """
    clfs = {
        'RandForest': ensemble.RandomForestClassifier(n_estimators=20,
                                                      # oob_score=True,
                                                      min_samples_leaf=2,
                                                      min_samples_split=3,
                                                      n_jobs=nb_workers),
        'GradBoost': ensemble.GradientBoostingClassifier(subsample=0.25,
                                                         warm_start=False,
                                                         max_depth=6,
                                                         min_samples_leaf=6,
                                                         n_estimators=200,
                                                         min_samples_split=7),
        'LogistRegr': linear_model.LogisticRegression(solver='sag',
                                                      n_jobs=nb_workers),
        'KNN': neighbors.KNeighborsClassifier(n_jobs=nb_workers),
        'SVM': svm.SVC(kernel='rbf', probability=True,
                       tol=2e-3, max_iter=5000),
        'DecTree': tree.DecisionTreeClassifier(),
        # 'RBM': create_pipeline_neuron_net(),
        'AdaBoost': ensemble.AdaBoostClassifier(n_estimators=5),
        # 'NuSVM-rbf': svm.NuSVC(kernel='rbf', probability=True),
    }
    return clfs 
Example #24
Source File: predict.py    From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def gbc_classify(train_x, train_y):
    feature_indexs = getTopFeatures(train_x, train_y)
    sub_x_Train = get_data(train_x, feature_indexs[:16], features.feature_pair_sub_list
                ,features.feature_pair_plus_list, features.feature_pair_mul_list, features.feature_pair_divide_list[:20],
                features.feature_pair_sub_mul_list[:20])
    labels = toLabels(train_y)
    gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=8)
    gbc.fit(sub_x_Train, labels)
    return gbc

# use svm to predict the loss, based on the result of gbm classifier 
Example #25
Source File: test_boosted_trees_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_conversion_bad_inputs(self):
        # Error on converting an untrained model
        with self.assertRaises(Exception):
            model = GradientBoostingClassifier()
            spec = skl_converter.convert(model, "data", "out")

        # Check the expected class during covnersion.
        from sklearn.preprocessing import OneHotEncoder

        with self.assertRaises(Exception):
            model = OneHotEncoder()
            spec = skl_converter.convert(model, "data", "out") 
Example #26
Source File: binary.py    From stacking with MIT License 5 votes vote down vote up
def build_model(self):
            return GradientBoostingClassifier(**self.params) 
Example #27
Source File: utils.py    From disentanglement_lib with Apache License 2.0 5 votes vote down vote up
def gradient_boosting_classifier():
  """Default gradient boosting classifier."""
  return GradientBoostingClassifier() 
Example #28
Source File: test_boosted_trees_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state=1)
        target = scikit_data["target"] > scikit_data["target"].mean()
        scikit_model.fit(scikit_data["data"], target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
Example #29
Source File: test_general.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def exp_gbc_1():
    return CVExperiment(GradientBoostingClassifier, dict(subsample=0.5))


#################### KNeighborsClassifier Experiments #################### 
Example #30
Source File: test_core_pipeline.py    From lale with Apache License 2.0 5 votes vote down vote up
def test_import_from_sklearn_pipeline_noop(self):
        from sklearn.pipeline import Pipeline
        from sklearn.ensemble import GradientBoostingClassifier
        from lale.helpers import import_from_sklearn_pipeline
        pipe = Pipeline([('noop', None), ('gbc', GradientBoostingClassifier())])
        with self.assertRaises(ValueError):
            imported_pipeline = import_from_sklearn_pipeline(pipe)