Python sklearn.model_selection.learning_curve() Examples
The following are 30
code examples of sklearn.model_selection.learning_curve().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_learning_curve_batch_and_incremental_learning_are_equal(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) train_sizes = np.linspace(0.2, 1.0, 5) estimator = PassiveAggressiveClassifier(max_iter=1, tol=None, shuffle=False) train_sizes_inc, train_scores_inc, test_scores_inc = \ learning_curve( estimator, X, y, train_sizes=train_sizes, cv=3, exploit_incremental_learning=True) train_sizes_batch, train_scores_batch, test_scores_batch = \ learning_curve( estimator, X, y, cv=3, train_sizes=train_sizes, exploit_incremental_learning=False) assert_array_equal(train_sizes_inc, train_sizes_batch) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
Example #2
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_learning_curve_verbose(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) old_stdout = sys.stdout sys.stdout = StringIO() try: train_sizes, train_scores, test_scores = \ learning_curve(estimator, X, y, cv=3, verbose=1) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert("[learning_curve]" in out)
Example #3
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_learning_curve_batch_and_incremental_learning_are_equal(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) train_sizes = np.linspace(0.2, 1.0, 5) estimator = PassiveAggressiveClassifier(max_iter=1, tol=None, shuffle=False) train_sizes_inc, train_scores_inc, test_scores_inc = \ learning_curve( estimator, X, y, train_sizes=train_sizes, cv=3, exploit_incremental_learning=True) train_sizes_batch, train_scores_batch, test_scores_batch = \ learning_curve( estimator, X, y, cv=3, train_sizes=train_sizes, exploit_incremental_learning=False) assert_array_equal(train_sizes_inc, train_sizes_batch) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
Example #4
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_learning_curve_verbose(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) old_stdout = sys.stdout sys.stdout = StringIO() try: train_sizes, train_scores, test_scores = \ learning_curve(estimator, X, y, cv=3, verbose=1) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert("[learning_curve]" in out)
Example #5
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_learning_curve_with_boolean_indices(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) cv = KFold(n_splits=3) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10)) # 0.23. warning about tol not having its correct default value.
Example #6
Source File: model_selection_insight.py From karura with Apache License 2.0 | 5 votes |
def _set_description(self, dfe): importances = pd.Series(self.model.feature_importances_, index=dfe.get_features().columns).sort_values(ascending=False) y = dfe.df[dfe.target] X = dfe.df.drop(dfe.target, axis=1) train_sizes, train_scores, test_scores = learning_curve(self.model, X, y, n_jobs=self.n_jobs) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) pic = ImageFile.create() with pic.plot() as plt_fig: plt, fig = plt_fig fig.set_figwidth(12) plt.subplot(121) importances.plot(kind="bar") ax2 = plt.subplot(122) ax2.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1,color="r") ax2.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") ax2.plot(train_sizes, train_scores_mean, "o-", color="r", label="学習精度" if self.lang == "ja" else "Training score") ax2.plot(train_sizes, test_scores_mean, 'o-', color="g", label="評価精度" if self.lang == "ja" else "Cross-validation score") ax2.set_xlabel("学習データ量(行数)" if self.lang == "ja" else "data records") ax2.set_ylabel("精度" if self.lang == "ja" else "accuracy") ax2.set_ylim(0, 1) ax2.legend(loc="best") params = (self.score, self.model.__class__.__name__) self.description = { "ja": Description("モデルの精度は{:.3f}です(利用モデル:{})。各項目の貢献度は図のようになっています。".format(*params), pic), "en": Description("The model accuracy is {:.3f}(model is {}). The contributions of each features are here.".format(*params), pic) }
Example #7
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_with_shuffle(): # Following test case was designed this way to verify the code # changes made in pull request: #7506. X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18]]) y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4]) groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4]) # Splits on these groups fail without shuffle as the first iteration # of the learning curve doesn't contain label 4 in the training set. estimator = PassiveAggressiveClassifier(max_iter=5, tol=None, shuffle=False) cv = GroupKFold(n_splits=2) train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve( estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups, shuffle=True, random_state=2) assert_array_almost_equal(train_scores_batch.mean(axis=1), np.array([0.75, 0.3, 0.36111111])) assert_array_almost_equal(test_scores_batch.mean(axis=1), np.array([0.36111111, 0.25, 0.25])) assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups) train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve( estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups, shuffle=True, random_state=2, exploit_incremental_learning=True) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
Example #8
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_with_boolean_indices(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) cv = KFold(n_splits=3) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
Example #9
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_remove_duplicate_sample_sizes(): X, y = make_classification(n_samples=3, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(2) train_sizes, _, _ = assert_warns( RuntimeWarning, learning_curve, estimator, X, y, cv=3, train_sizes=np.linspace(0.33, 1.0, 3)) assert_array_equal(train_sizes, [1, 2])
Example #10
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_n_sample_range_out_of_bounds(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.0, 1.0]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.1, 1.1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 20]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[1, 21])
Example #11
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_incremental_learning(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockIncrementalImprovingEstimator(20) for shuffle_train in [False, True]: train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=3, exploit_incremental_learning=True, train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
Example #12
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_incremental_learning_not_possible(): X, y = make_classification(n_samples=2, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) # The mockup does not have partial_fit() estimator = MockImprovingEstimator(1) assert_raises(ValueError, learning_curve, estimator, X, y, exploit_incremental_learning=True)
Example #13
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve_unsupervised(): X, _ = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
Example #14
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_learning_curve(): n_samples = 30 n_splits = 3 X, y = make_classification(n_samples=n_samples, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits)) for shuffle_train in [False, True]: with warnings.catch_warnings(record=True) as w: train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=KFold(n_splits=n_splits), train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) assert_equal(train_scores.shape, (10, 3)) assert_equal(test_scores.shape, (10, 3)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10)) # Test a custom cv splitter that can iterate only once with warnings.catch_warnings(record=True) as w: train_sizes2, train_scores2, test_scores2 = learning_curve( estimator, X, y, cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples), train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) assert_array_almost_equal(train_scores2, train_scores) assert_array_almost_equal(test_scores2, test_scores)
Example #15
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_learning_curve(self): digits = datasets.load_digits() df = pdml.ModelFrame(digits) result = df.model_selection.learning_curve(df.naive_bayes.GaussianNB()) expected = ms.learning_curve(nb.GaussianNB(), digits.data, digits.target) self.assertEqual(len(result), 3) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1]) self.assert_numpy_array_almost_equal(result[2], expected[2])
Example #16
Source File: model.py From MOS-X with MIT License | 5 votes |
def _plot_learning_curve(estimator, X, y, ylim=None, cv=None, scoring=None, title=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)): import matplotlib.pyplot as plt from sklearn.model_selection import learning_curve fig = plt.figure() if title is not None: plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, scoring=scoring, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return fig
Example #17
Source File: estimator.py From EDeN with MIT License | 5 votes |
def learning_curve(self, graphs, targets, cv=5, n_steps=10, start_fraction=0.1): """learning_curve.""" graphs, targets = paired_shuffle(graphs, targets) x = self.transform(graphs) train_sizes = np.linspace(start_fraction, 1.0, n_steps) scoring = 'roc_auc' train_sizes, train_scores, test_scores = learning_curve( self.model, x, targets, cv=cv, train_sizes=train_sizes, scoring=scoring) return train_sizes, train_scores, test_scores
Example #18
Source File: grid_search_cv.py From text-classifier with Apache License 2.0 | 5 votes |
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, train_sizes=np.linspace(.1, 1.0, 5), n_jobs=1, figure_path=None): plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") plt.savefig(figure_path) return plt
Example #19
Source File: displayer.py From cherry with MIT License | 5 votes |
def plot_learning_curve(self, estimator, title, X, y, ylim=None, cv=None, n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)): # From https://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html print('Drawing curve, depending on your datasets size, this may take several minutes to several hours.') plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") plt.show()
Example #20
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_with_shuffle(): # Following test case was designed this way to verify the code # changes made in pull request: #7506. X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18]]) y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4]) groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4]) # Splits on these groups fail without shuffle as the first iteration # of the learning curve doesn't contain label 4 in the training set. estimator = PassiveAggressiveClassifier(max_iter=5, tol=None, shuffle=False) cv = GroupKFold(n_splits=2) train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve( estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups, shuffle=True, random_state=2) assert_array_almost_equal(train_scores_batch.mean(axis=1), np.array([0.75, 0.3, 0.36111111])) assert_array_almost_equal(test_scores_batch.mean(axis=1), np.array([0.36111111, 0.25, 0.25])) assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups, error_score='raise') train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve( estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3), groups=groups, shuffle=True, random_state=2, exploit_incremental_learning=True) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
Example #21
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_remove_duplicate_sample_sizes(): X, y = make_classification(n_samples=3, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(2) train_sizes, _, _ = assert_warns( RuntimeWarning, learning_curve, estimator, X, y, cv=3, train_sizes=np.linspace(0.33, 1.0, 3)) assert_array_equal(train_sizes, [1, 2])
Example #22
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_n_sample_range_out_of_bounds(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.0, 1.0]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0.1, 1.1]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[0, 20]) assert_raises(ValueError, learning_curve, estimator, X, y, cv=3, train_sizes=[1, 21])
Example #23
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_incremental_learning(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockIncrementalImprovingEstimator(20) for shuffle_train in [False, True]: train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=3, exploit_incremental_learning=True, train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
Example #24
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_incremental_learning_not_possible(): X, y = make_classification(n_samples=2, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) # The mockup does not have partial_fit() estimator = MockImprovingEstimator(1) assert_raises(ValueError, learning_curve, estimator, X, y, exploit_incremental_learning=True)
Example #25
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve_unsupervised(): X, _ = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(20) train_sizes, train_scores, test_scores = learning_curve( estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
Example #26
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_learning_curve(): n_samples = 30 n_splits = 3 X, y = make_classification(n_samples=n_samples, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits)) for shuffle_train in [False, True]: with warnings.catch_warnings(record=True) as w: train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=KFold(n_splits=n_splits), train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) assert_equal(train_scores.shape, (10, 3)) assert_equal(test_scores.shape, (10, 3)) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10)) # Test a custom cv splitter that can iterate only once with warnings.catch_warnings(record=True) as w: train_sizes2, train_scores2, test_scores2 = learning_curve( estimator, X, y, cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples), train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) assert_array_almost_equal(train_scores2, train_scores) assert_array_almost_equal(test_scores2, test_scores)
Example #27
Source File: malss.py From malss with MIT License | 5 votes |
def __calc_learning_curve(self, algorithm): estimator = algorithm.estimator train_sizes, train_scores, test_scores = learning_curve( estimator, self.data.X, self.data.y, cv=self.cv, scoring=self.scoring, n_jobs=self.n_jobs) # parallel run in cross validation train_scores_mean = np.mean(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) return {'x': train_sizes, 'y_train': train_scores_mean, 'y_cv': test_scores_mean}
Example #28
Source File: classifier.py From Fake_News_Detection with MIT License | 4 votes |
def plot_learing_curve(pipeline,title): size = 10000 cv = KFold(size, shuffle=True) X = DataPrep.train_news["Statement"] y = DataPrep.train_news["Label"] pl = pipeline pl.fit(X,y) train_sizes, train_scores, test_scores = learning_curve(pl, X, y, n_jobs=-1, cv=cv, train_sizes=np.linspace(.1, 1.0, 5), verbose=0) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.figure() plt.title(title) plt.legend(loc="best") plt.xlabel("Training examples") plt.ylabel("Score") plt.gca().invert_yaxis() # box-like grid plt.grid() # plot the std deviation as a transparent range at each training set size plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") # plot the average training and test score lines at each training set size plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") # sizes the window for readability and displays the plot # shows error from 0 to 1.1 plt.ylim(-.1,1.1) plt.show() #below command will plot learing curves for each of the classifiers
Example #29
Source File: plots.py From AlphaPy with Apache License 2.0 | 4 votes |
def generate_plots(model, partition): r"""Generate plots while running the pipeline. Parameters ---------- model : alphapy.Model The model object with plotting specifications. partition : alphapy.Partition Reference to the dataset. Returns ------- None : None """ logger.info('='*80) logger.info("Generating Plots for partition: %s", datasets[partition]) # Extract model parameters calibration_plot = model.specs['calibration_plot'] confusion_matrix = model.specs['confusion_matrix'] importances = model.specs['importances'] learning_curve = model.specs['learning_curve'] roc_curve = model.specs['roc_curve'] # Generate plots if calibration_plot: plot_calibration(model, partition) if confusion_matrix: plot_confusion_matrix(model, partition) if roc_curve: plot_roc_curve(model, partition) if partition == Partition.train: if learning_curve: plot_learning_curve(model, partition) if importances: plot_importance(model, partition) # # Function get_plot_directory #
Example #30
Source File: learning_curve.py From MAST-ML with MIT License | 4 votes |
def sample_learning_curve(X, y, estimator, cv, scoring, Xgroups=None): """ Method that calculates data used to plot a sample learning curve, e.g. the RMSE of a cross-validation routine using a specified model and a given fraction of the total training data Args: X: (numpy array), array of X data values y: (numpy array), array of y data values estimator: (scikit-learn model object), a scikit-learn model used for fitting cv: (scikit-learn cross validation object), a scikit-learn cross validation object to construct train/test splits scoring: (scikit-learn metric object), a scikit-learn metric to use as a scorer Xgroups: (list), list of row indices corresponding to each group Returns: train_sizes: (numpy array), array of fractions of training data used in learning curve train_mean: (numpy array), array of means of training data scores for each training data fraction test_mean: (numpy array), array of means of testing data scores for each training data fraction train_stdev: (numpy array), array of standard deviations of training data scores for each training data fraction test_stdev: (numpy array), array of standard deviations of testing data scores for each training data fraction """ train_sizes = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) if Xgroups.shape[0] > 0: Xgroups = np.array(Xgroups).reshape(-1, ) else: Xgroups = np.zeros(len(y)) train_sizes, train_scores, valid_scores = learning_curve(estimator=estimator, X=X, y=y, train_sizes=train_sizes, scoring=scoring, cv=cv, groups=Xgroups) train_mean = np.mean(train_scores, axis=1) test_mean = np.mean(valid_scores, axis=1) train_stdev = np.std(train_scores, axis=1) test_stdev = np.std(valid_scores, axis=1) return train_sizes, train_mean, test_mean, train_stdev, test_stdev