Python sklearn.datasets.make_hastie_10_2() Examples
The following are 30
code examples of sklearn.datasets.make_hastie_10_2().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message(UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
Example #2
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start_fortran(Cls): # Test that feeding a X in Fortran-ordered is giving the same results as # in C-ordered X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) est_c = Cls(n_estimators=1, random_state=1, warm_start=True) est_fortran = Cls(n_estimators=1, random_state=1, warm_start=True) est_c.fit(X, y) est_c.set_params(n_estimators=11) est_c.fit(X, y) X_fortran = np.asfortranarray(X) est_fortran.fit(X_fortran, y) est_fortran.set_params(n_estimators=11) est_fortran.fit(X_fortran, y) assert_array_almost_equal(est_c.predict(X), est_fortran.predict(X))
Example #3
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 6 votes |
def check_classification_synthetic(presort, loss): # Test GradientBoostingClassifier on synthetic dataset used by # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.09) gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, subsample=0.5, random_state=0, presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.08)
Example #4
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_warm_start_oob(): # Test if warm start OOB equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=200, max_depth=1, subsample=0.5, random_state=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5, random_state=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=200) est_ws.fit(X, y) assert_array_almost_equal(est_ws.oob_improvement_[:100], est.oob_improvement_[:100])
Example #5
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start(Cls): # Test if warm start equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) est = Cls(n_estimators=200, max_depth=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=200) est_ws.fit(X, y) if Cls is GradientBoostingRegressor: assert_array_almost_equal(est_ws.predict(X), est.predict(X)) else: # Random state is preserved and hence predict_proba must also be # same assert_array_equal(est_ws.predict(X), est.predict(X)) assert_array_almost_equal(est_ws.predict_proba(X), est.predict_proba(X))
Example #6
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_check_inputs_predict_stages(): # check that predict_stages through an error if the type of X is not # supported x, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) x_sparse_csc = csc_matrix(x) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(x, y) score = np.zeros((y.shape)).reshape(-1, 1) assert_raise_message(ValueError, "When X is a sparse matrix, a CSR format is expected", predict_stages, clf.estimators_, x_sparse_csc, clf.learning_rate, score) x_fortran = np.asfortranarray(x) assert_raise_message(ValueError, "X should be C-ordered np.ndarray", predict_stages, clf.estimators_, x_fortran, clf.learning_rate, score)
Example #7
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_classification_synthetic(presort, loss): # Test GradientBoostingClassifier on synthetic dataset used by # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.09) gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, subsample=0.5, random_state=0, presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.08)
Example #8
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start(random_state=42): # Test if fitting incrementally with warm start gives a forest of the # right size and the same results as a normal fit. X, y = make_hastie_10_2(n_samples=20, random_state=1) clf_ws = None for n_estimators in [5, 10]: if clf_ws is None: clf_ws = BaggingClassifier(n_estimators=n_estimators, random_state=random_state, warm_start=True) else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) assert_equal(len(clf_ws), n_estimators) clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) assert_equal(set([tree.random_state for tree in clf_ws]), set([tree.random_state for tree in clf_no_ws]))
Example #9
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_warm_start_equivalence(): # warm started classifier with 5+5 estimators should be equivalent to # one classifier with 10 estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141) clf_ws.fit(X_train, y_train) clf_ws.set_params(n_estimators=10) clf_ws.fit(X_train, y_train) y1 = clf_ws.predict(X_test) clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141) clf.fit(X_train, y_train) y2 = clf.predict(X_test) assert_array_almost_equal(y1, y2)
Example #10
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_warm_start(random_state=42): # Test if fitting incrementally with warm start gives a forest of the # right size and the same results as a normal fit. X, y = make_hastie_10_2(n_samples=20, random_state=1) clf_ws = None for n_estimators in [5, 10]: if clf_ws is None: clf_ws = BaggingClassifier(n_estimators=n_estimators, random_state=random_state, warm_start=True) else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) assert_equal(len(clf_ws), n_estimators) clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) assert_equal(set([tree.random_state for tree in clf_ws]), set([tree.random_state for tree in clf_no_ws]))
Example #11
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start_equivalence(): # warm started classifier with 5+5 estimators should be equivalent to # one classifier with 10 estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141) clf_ws.fit(X_train, y_train) clf_ws.set_params(n_estimators=10) clf_ws.fit(X_train, y_train) y1 = clf_ws.predict(X_test) clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141) clf.fit(X_train, y_train) y2 = clf.predict(X_test) assert_array_almost_equal(y1, y2)
Example #12
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message(UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
Example #13
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_with_oob_score_fails(): # Check using oob_score and warm_start simultaneously fails X, y = make_hastie_10_2(n_samples=20, random_state=1) clf = BaggingClassifier(n_estimators=5, warm_start=True, oob_score=True) assert_raises(ValueError, clf.fit, X, y)
Example #14
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_oob_score_consistency(): # Make sure OOB scores are identical when random_state, estimator, and # training data are fixed and fitting is done twice X, y = make_hastie_10_2(n_samples=200, random_state=1) bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5, oob_score=True, random_state=1) assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)
Example #15
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_estimators_samples(): # Check that format of estimators_samples_ is correct and that results # generated at fit time can be identically reproduced at a later time # using data saved in object attributes. X, y = make_hastie_10_2(n_samples=200, random_state=1) bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5, random_state=1, bootstrap=False) bagging.fit(X, y) # Get relevant attributes estimators_samples = bagging.estimators_samples_ estimators_features = bagging.estimators_features_ estimators = bagging.estimators_ # Test for correct formatting assert_equal(len(estimators_samples), len(estimators)) assert_equal(len(estimators_samples[0]), len(X)) assert_equal(estimators_samples[0].dtype.kind, 'b') # Re-fit single estimator to test for consistent sampling estimator_index = 0 estimator_samples = estimators_samples[estimator_index] estimator_features = estimators_features[estimator_index] estimator = estimators[estimator_index] X_train = (X[estimator_samples])[:, estimator_features] y_train = y[estimator_samples] orig_coefs = estimator.coef_ estimator.fit(X_train, y_train) new_coefs = estimator.coef_ assert_array_almost_equal(orig_coefs, new_coefs)
Example #16
Source File: test_bagging.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_max_samples_consistency(): # Make sure validated max_samples and original max_samples are identical # when valid integer max_samples supplied by user max_samples = 100 X, y = make_hastie_10_2(n_samples=2*max_samples, random_state=1) bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=max_samples, max_features=0.5, random_state=1) bagging.fit(X, y) assert_equal(bagging._max_samples, max_samples)
Example #17
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_max_feature_regression(): # Test to make sure random state is set properly. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=5, max_depth=2, learning_rate=.1, max_features=2, random_state=1) gbrt.fit(X_train, y_train) deviance = gbrt.loss_(y_test, gbrt.decision_function(X_test)) assert_true(deviance < 0.5, "GB failed with deviance %.4f" % deviance)
Example #18
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_staged_predict_proba(): # Test whether staged predict proba eventually gives # the same prediction. X, y = datasets.make_hastie_10_2(n_samples=1200, random_state=1) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingClassifier(n_estimators=20) # test raise NotFittedError if not fitted assert_raises(NotFittedError, lambda X: np.fromiter( clf.staged_predict_proba(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) # test if prediction for last stage equals ``predict`` for y_pred in clf.staged_predict(X_test): assert_equal(y_test.shape, y_pred.shape) assert_array_equal(clf.predict(X_test), y_pred) # test if prediction for last stage equals ``predict_proba`` for staged_proba in clf.staged_predict_proba(X_test): assert_equal(y_test.shape[0], staged_proba.shape[0]) assert_equal(2, staged_proba.shape[1]) assert_array_equal(clf.predict_proba(X_test), staged_proba)
Example #19
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start(): # Test if warm start equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=200, max_depth=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=200) est_ws.fit(X, y) assert_array_almost_equal(est_ws.predict(X), est.predict(X))
Example #20
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_n_estimators(): # Test if warm start equals fit - set n_estimators. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=300, max_depth=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=300) est_ws.fit(X, y) assert_array_almost_equal(est_ws.predict(X), est.predict(X))
Example #21
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_smaller_n_estimators(): # Test if warm start with smaller n_estimators raises error X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) est.fit(X, y) est.set_params(n_estimators=99) assert_raises(ValueError, est.fit, X, y)
Example #22
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_max_depth(): # Test if possible to fit trees of different depth in ensemble. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) est.fit(X, y) est.set_params(n_estimators=110, max_depth=2) est.fit(X, y) # last 10 trees have different depth assert_equal(est.estimators_[0, 0].max_depth, 1) for i in range(1, 11): assert_equal(est.estimators_[-i, 0].max_depth, 2)
Example #23
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_clear(): # Test if fit clears state. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) est.fit(X, y) est_2 = Cls(n_estimators=100, max_depth=1, warm_start=True) est_2.fit(X, y) # inits state est_2.set_params(warm_start=False) est_2.fit(X, y) # clears old state and equals est assert_array_almost_equal(est_2.predict(X), est.predict(X))
Example #24
Source File: test_samples_generator.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_hastie_10_2(): X, y = make_hastie_10_2(n_samples=100, random_state=0) assert_equal(X.shape, (100, 10), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(np.unique(y).shape, (2,), "Unexpected number of classes")
Example #25
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_equal_n_estimators(): # Test if warm start with equal n_estimators does nothing X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) est.fit(X, y) est2 = clone(est) est2.set_params(n_estimators=est.n_estimators, warm_start=True) est2.fit(X, y) assert_array_almost_equal(est2.predict(X), est.predict(X))
Example #26
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warm_start_oob_switch(): # Test if oob can be turned on during warm start. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) est.fit(X, y) est.set_params(n_estimators=110, subsample=0.5) est.fit(X, y) assert_array_equal(est.oob_improvement_[:100], np.zeros(100)) # the last 10 are not zeros assert_array_equal(est.oob_improvement_[-10:] == 0.0, np.zeros(10, dtype=np.bool))
Example #27
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_monitor_early_stopping(): # Test if monitor return value works. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5) est.fit(X, y, monitor=early_stopping_monitor) assert_equal(est.n_estimators, 20) # this is not altered assert_equal(est.estimators_.shape[0], 10) assert_equal(est.train_score_.shape[0], 10) assert_equal(est.oob_improvement_.shape[0], 10) # try refit est.set_params(n_estimators=30) est.fit(X, y) assert_equal(est.n_estimators, 30) assert_equal(est.estimators_.shape[0], 30) assert_equal(est.train_score_.shape[0], 30) est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5, warm_start=True) est.fit(X, y, monitor=early_stopping_monitor) assert_equal(est.n_estimators, 20) assert_equal(est.estimators_.shape[0], 10) assert_equal(est.train_score_.shape[0], 10) assert_equal(est.oob_improvement_.shape[0], 10) # try refit est.set_params(n_estimators=30, warm_start=False) est.fit(X, y) assert_equal(est.n_estimators, 30) assert_equal(est.train_score_.shape[0], 30) assert_equal(est.estimators_.shape[0], 30) assert_equal(est.oob_improvement_.shape[0], 30)
Example #28
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_max_leaf_nodes_max_depth(): # Test precedence of max_leaf_nodes over max_depth. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [GradientBoostingRegressor, GradientBoostingClassifier] k = 4 for GBEstimator in all_estimators: est = GBEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y) tree = est.estimators_[0, 0].tree_ assert_greater(tree.max_depth, 1) est = GBEstimator(max_depth=1).fit(X, y) tree = est.estimators_[0, 0].tree_ assert_equal(tree.max_depth, 1)
Example #29
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [GradientBoostingRegressor, GradientBoostingClassifier] for GBEstimator in all_estimators: est = GBEstimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_.flat: assert_equal(tree.min_impurity_split, 0.1)
Example #30
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [GradientBoostingRegressor, GradientBoostingClassifier] for GBEstimator in all_estimators: est = GBEstimator(min_impurity_decrease=0.1) est.fit(X, y) for tree in est.estimators_.flat: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1)