Python sklearn.model_selection.learning_curve() Examples

The following are 30 code examples of sklearn.model_selection.learning_curve(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
                                            shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1)) 
Example #2
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_learning_curve_verbose():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        train_sizes, train_scores, test_scores = \
            learning_curve(estimator, X, y, cv=3, verbose=1)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[learning_curve]" in out) 
Example #3
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
                                            shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1)) 
Example #4
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_learning_curve_verbose():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        train_sizes, train_scores, test_scores = \
            learning_curve(estimator, X, y, cv=3, verbose=1)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[learning_curve]" in out) 
Example #5
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_learning_curve_with_boolean_indices():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    cv = KFold(n_splits=3)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))


# 0.23. warning about tol not having its correct default value. 
Example #6
Source File: model_selection_insight.py    From karura with Apache License 2.0 5 votes vote down vote up
def _set_description(self, dfe):
        importances = pd.Series(self.model.feature_importances_, index=dfe.get_features().columns).sort_values(ascending=False)

        y = dfe.df[dfe.target]
        X = dfe.df.drop(dfe.target, axis=1)
        train_sizes, train_scores, test_scores = learning_curve(self.model, X, y, n_jobs=self.n_jobs)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)
            
        pic = ImageFile.create()
        with pic.plot() as plt_fig:
            plt, fig = plt_fig
            fig.set_figwidth(12)
            plt.subplot(121)
            importances.plot(kind="bar")

            ax2 = plt.subplot(122)
            ax2.fill_between(train_sizes, train_scores_mean - train_scores_std,
                train_scores_mean + train_scores_std, alpha=0.1,color="r")
            ax2.fill_between(train_sizes, test_scores_mean - test_scores_std,
                test_scores_mean + test_scores_std, alpha=0.1, color="g")
            ax2.plot(train_sizes, train_scores_mean, "o-", color="r", label="学習精度" if self.lang == "ja" else "Training score")
            ax2.plot(train_sizes, test_scores_mean, 'o-', color="g", label="評価精度" if self.lang == "ja" else "Cross-validation score")
            ax2.set_xlabel("学習データ量(行数)" if self.lang == "ja" else "data records")
            ax2.set_ylabel("精度" if self.lang == "ja" else "accuracy")
            ax2.set_ylim(0, 1)
            ax2.legend(loc="best")
        
        params = (self.score, self.model.__class__.__name__)
        self.description = {
            "ja": Description("モデルの精度は{:.3f}です(利用モデル:{})。各項目の貢献度は図のようになっています。".format(*params), pic),
            "en": Description("The model accuracy is {:.3f}(model is {}). The contributions of each features are here.".format(*params), pic)
        } 
Example #7
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_with_shuffle():
    # Following test case was designed this way to verify the code
    # changes made in pull request: #7506.
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
                 [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
                 [15, 16], [17, 18]])
    y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
    groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
    # Splits on these groups fail without shuffle as the first iteration
    # of the learning curve doesn't contain label 4 in the training set.
    estimator = PassiveAggressiveClassifier(max_iter=5, tol=None,
                                            shuffle=False)

    cv = GroupKFold(n_splits=2)
    train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2)
    assert_array_almost_equal(train_scores_batch.mean(axis=1),
                              np.array([0.75, 0.3, 0.36111111]))
    assert_array_almost_equal(test_scores_batch.mean(axis=1),
                              np.array([0.36111111, 0.25, 0.25]))
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1,
                  train_sizes=np.linspace(0.3, 1.0, 3), groups=groups)

    train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2,
        exploit_incremental_learning=True)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1)) 
Example #8
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_with_boolean_indices():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    cv = KFold(n_splits=3)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10)) 
Example #9
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_remove_duplicate_sample_sizes():
    X, y = make_classification(n_samples=3, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(2)
    train_sizes, _, _ = assert_warns(
        RuntimeWarning, learning_curve, estimator, X, y, cv=3,
        train_sizes=np.linspace(0.33, 1.0, 3))
    assert_array_equal(train_sizes, [1, 2]) 
Example #10
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_n_sample_range_out_of_bounds():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.0, 1.0])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.1, 1.1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 20])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[1, 21]) 
Example #11
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_incremental_learning():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockIncrementalImprovingEstimator(20)
    for shuffle_train in [False, True]:
        train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=3, exploit_incremental_learning=True,
            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10)) 
Example #12
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_incremental_learning_not_possible():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    # The mockup does not have partial_fit()
    estimator = MockImprovingEstimator(1)
    assert_raises(ValueError, learning_curve, estimator, X, y,
                  exploit_incremental_learning=True) 
Example #13
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve_unsupervised():
    X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10)) 
Example #14
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_learning_curve():
    n_samples = 30
    n_splits = 3
    X, y = make_classification(n_samples=n_samples, n_features=1,
                               n_informative=1, n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
    for shuffle_train in [False, True]:
        with warnings.catch_warnings(record=True) as w:
            train_sizes, train_scores, test_scores = learning_curve(
                estimator, X, y, cv=KFold(n_splits=n_splits),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_equal(train_scores.shape, (10, 3))
        assert_equal(test_scores.shape, (10, 3))
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10))

        # Test a custom cv splitter that can iterate only once
        with warnings.catch_warnings(record=True) as w:
            train_sizes2, train_scores2, test_scores2 = learning_curve(
                estimator, X, y,
                cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_array_almost_equal(train_scores2, train_scores)
        assert_array_almost_equal(test_scores2, test_scores) 
Example #15
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_learning_curve(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        result = df.model_selection.learning_curve(df.naive_bayes.GaussianNB())
        expected = ms.learning_curve(nb.GaussianNB(), digits.data, digits.target)

        self.assertEqual(len(result), 3)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2]) 
Example #16
Source File: model.py    From MOS-X with MIT License 5 votes vote down vote up
def _plot_learning_curve(estimator, X, y, ylim=None, cv=None, scoring=None, title=None, n_jobs=1,
                         train_sizes=np.linspace(.1, 1.0, 5)):
    import matplotlib.pyplot as plt
    from sklearn.model_selection import learning_curve

    fig = plt.figure()
    if title is not None:
        plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, scoring=scoring, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    return fig 
Example #17
Source File: estimator.py    From EDeN with MIT License 5 votes vote down vote up
def learning_curve(self, graphs, targets,
                       cv=5, n_steps=10, start_fraction=0.1):
        """learning_curve."""
        graphs, targets = paired_shuffle(graphs, targets)
        x = self.transform(graphs)
        train_sizes = np.linspace(start_fraction, 1.0, n_steps)
        scoring = 'roc_auc'
        train_sizes, train_scores, test_scores = learning_curve(
            self.model, x, targets,
            cv=cv, train_sizes=train_sizes,
            scoring=scoring)
        return train_sizes, train_scores, test_scores 
Example #18
Source File: grid_search_cv.py    From text-classifier with Apache License 2.0 5 votes vote down vote up
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
                        train_sizes=np.linspace(.1, 1.0, 5), n_jobs=1, figure_path=None):
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    plt.savefig(figure_path)
    return plt 
Example #19
Source File: displayer.py    From cherry with MIT License 5 votes vote down vote up
def plot_learning_curve(self, estimator, title, X, y, ylim=None, cv=None,
                            n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)):
        # From https://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
        print('Drawing curve, depending on your datasets size, this may take several minutes to several hours.')
        plt.figure()
        plt.title(title)
        if ylim is not None:
            plt.ylim(*ylim)
        plt.xlabel("Training examples")
        plt.ylabel("Score")
        train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)
        plt.grid()
        plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.1,
                         color="r")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.1, color="g")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
                 label="Training score")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
                 label="Cross-validation score")
        plt.legend(loc="best")
        plt.show() 
Example #20
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_with_shuffle():
    # Following test case was designed this way to verify the code
    # changes made in pull request: #7506.
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
                 [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
                 [15, 16], [17, 18]])
    y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
    groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
    # Splits on these groups fail without shuffle as the first iteration
    # of the learning curve doesn't contain label 4 in the training set.
    estimator = PassiveAggressiveClassifier(max_iter=5, tol=None,
                                            shuffle=False)

    cv = GroupKFold(n_splits=2)
    train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2)
    assert_array_almost_equal(train_scores_batch.mean(axis=1),
                              np.array([0.75, 0.3, 0.36111111]))
    assert_array_almost_equal(test_scores_batch.mean(axis=1),
                              np.array([0.36111111, 0.25, 0.25]))
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1,
                  train_sizes=np.linspace(0.3, 1.0, 3), groups=groups,
                  error_score='raise')

    train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2,
        exploit_incremental_learning=True)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1)) 
Example #21
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_remove_duplicate_sample_sizes():
    X, y = make_classification(n_samples=3, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(2)
    train_sizes, _, _ = assert_warns(
        RuntimeWarning, learning_curve, estimator, X, y, cv=3,
        train_sizes=np.linspace(0.33, 1.0, 3))
    assert_array_equal(train_sizes, [1, 2]) 
Example #22
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_n_sample_range_out_of_bounds():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.0, 1.0])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.1, 1.1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 20])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[1, 21]) 
Example #23
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_incremental_learning():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockIncrementalImprovingEstimator(20)
    for shuffle_train in [False, True]:
        train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=3, exploit_incremental_learning=True,
            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10)) 
Example #24
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_incremental_learning_not_possible():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    # The mockup does not have partial_fit()
    estimator = MockImprovingEstimator(1)
    assert_raises(ValueError, learning_curve, estimator, X, y,
                  exploit_incremental_learning=True) 
Example #25
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve_unsupervised():
    X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10)) 
Example #26
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_learning_curve():
    n_samples = 30
    n_splits = 3
    X, y = make_classification(n_samples=n_samples, n_features=1,
                               n_informative=1, n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
    for shuffle_train in [False, True]:
        with warnings.catch_warnings(record=True) as w:
            train_sizes, train_scores, test_scores = learning_curve(
                estimator, X, y, cv=KFold(n_splits=n_splits),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_equal(train_scores.shape, (10, 3))
        assert_equal(test_scores.shape, (10, 3))
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10))

        # Test a custom cv splitter that can iterate only once
        with warnings.catch_warnings(record=True) as w:
            train_sizes2, train_scores2, test_scores2 = learning_curve(
                estimator, X, y,
                cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_array_almost_equal(train_scores2, train_scores)
        assert_array_almost_equal(test_scores2, test_scores) 
Example #27
Source File: malss.py    From malss with MIT License 5 votes vote down vote up
def __calc_learning_curve(self, algorithm):
        estimator = algorithm.estimator
        train_sizes, train_scores, test_scores = learning_curve(
            estimator,
            self.data.X,
            self.data.y,
            cv=self.cv,
            scoring=self.scoring,
            n_jobs=self.n_jobs)  # parallel run in cross validation
        train_scores_mean = np.mean(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)

        return {'x': train_sizes, 'y_train': train_scores_mean,
                'y_cv': test_scores_mean} 
Example #28
Source File: classifier.py    From Fake_News_Detection with MIT License 4 votes vote down vote up
def plot_learing_curve(pipeline,title):
    size = 10000
    cv = KFold(size, shuffle=True)
    
    X = DataPrep.train_news["Statement"]
    y = DataPrep.train_news["Label"]
    
    pl = pipeline
    pl.fit(X,y)
    
    train_sizes, train_scores, test_scores = learning_curve(pl, X, y, n_jobs=-1, cv=cv, train_sizes=np.linspace(.1, 1.0, 5), verbose=0)
       
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
     
    plt.figure()
    plt.title(title)
    plt.legend(loc="best")
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    plt.gca().invert_yaxis()
    
    # box-like grid
    plt.grid()
    
    # plot the std deviation as a transparent range at each training set size
    plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g")
    
    # plot the average training and test score lines at each training set size
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
    
    # sizes the window for readability and displays the plot
    # shows error from 0 to 1.1
    plt.ylim(-.1,1.1)
    plt.show()


#below command will plot learing curves for each of the classifiers 
Example #29
Source File: plots.py    From AlphaPy with Apache License 2.0 4 votes vote down vote up
def generate_plots(model, partition):
    r"""Generate plots while running the pipeline.

    Parameters
    ----------
    model : alphapy.Model
        The model object with plotting specifications.
    partition : alphapy.Partition
        Reference to the dataset.

    Returns
    -------
    None : None

    """

    logger.info('='*80)
    logger.info("Generating Plots for partition: %s", datasets[partition])

    # Extract model parameters

    calibration_plot = model.specs['calibration_plot']
    confusion_matrix = model.specs['confusion_matrix']
    importances = model.specs['importances']
    learning_curve = model.specs['learning_curve']
    roc_curve = model.specs['roc_curve']

    # Generate plots

    if calibration_plot:
        plot_calibration(model, partition)
    if confusion_matrix:
        plot_confusion_matrix(model, partition)
    if roc_curve:
        plot_roc_curve(model, partition)
    if partition == Partition.train:
        if learning_curve:
            plot_learning_curve(model, partition)
        if importances:
            plot_importance(model, partition)


#
# Function get_plot_directory
# 
Example #30
Source File: learning_curve.py    From MAST-ML with MIT License 4 votes vote down vote up
def sample_learning_curve(X, y, estimator, cv, scoring, Xgroups=None):
    """
    Method that calculates data used to plot a sample learning curve, e.g. the RMSE of a cross-validation routine using a
    specified model and a given fraction of the total training data

    Args:
        X: (numpy array), array of X data values

        y: (numpy array), array of y data values

        estimator: (scikit-learn model object), a scikit-learn model used for fitting

        cv: (scikit-learn cross validation object), a scikit-learn cross validation object to construct train/test splits

        scoring: (scikit-learn metric object), a scikit-learn metric to use as a scorer

        Xgroups: (list), list of row indices corresponding to each group

    Returns:
        train_sizes: (numpy array), array of fractions of training data used in learning curve

        train_mean: (numpy array), array of means of training data scores for each training data fraction

        test_mean: (numpy array), array of means of testing data scores for each training data fraction

        train_stdev: (numpy array), array of standard deviations of training data scores for each training data fraction

        test_stdev: (numpy array), array of standard deviations of testing data scores for each training data fraction

    """

    train_sizes = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
    if Xgroups.shape[0] > 0:
        Xgroups = np.array(Xgroups).reshape(-1, )
    else:
        Xgroups = np.zeros(len(y))

    train_sizes, train_scores, valid_scores = learning_curve(estimator=estimator, X=X, y=y, train_sizes=train_sizes,
                                                             scoring=scoring, cv=cv, groups=Xgroups)
    train_mean = np.mean(train_scores, axis=1)
    test_mean = np.mean(valid_scores, axis=1)
    train_stdev = np.std(train_scores, axis=1)
    test_stdev = np.std(valid_scores, axis=1)

    return train_sizes, train_mean, test_mean, train_stdev, test_stdev