Python Examples of sklearn.datasets.load

Source File: test_nfpc.py From fylearn with MIT License

7 votes

def test_build_meowa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory,
                                    aggregation_factory=nfpc.MEOWAFactory())

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    assert 0.80 < mean

Source File: 1_problem.py From pandas-feature-union with MIT License

7 votes

def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean())

Source File: 2_transform_solution.py From pandas-feature-union with MIT License

7 votes

def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean())

Source File: test_fpcga.py From fylearn with MIT License

7 votes

def test_classifier_iris():

    iris = load_iris()

    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1)

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)

    assert len(scores) == 10
    assert np.mean(scores) > 0.6
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 == pytest.approx(mean, 0.01)

Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License

7 votes

def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_linear)

    # test with callable
    svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T))
    score_callable = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_callable)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y)

Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)

Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    kfold = KFold(5)
    scores_indices = cross_val_score(svm, X, y, cv=kfold)
    kfold = KFold(5)
    cv_masks = []
    for train, test in kfold.split(X, y):
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks)

Source File: sequential.py From keras2pmml with MIT License

6 votes

def setUp(self):
        iris = load_iris()

        theano.config.floatX = 'float32'
        X = iris.data.astype(theano.config.floatX)
        y = iris.target.astype(np.int32)
        y_ohe = np_utils.to_categorical(y)

        model = Sequential()
        model.add(Dense(input_dim=X.shape[1], output_dim=5, activation='tanh'))
        model.add(Dense(input_dim=5, output_dim=y_ohe.shape[1], activation='sigmoid'))
        model.compile(loss='categorical_crossentropy', optimizer='sgd')
        model.fit(X, y_ohe, nb_epoch=10, batch_size=1, verbose=3, validation_data=None)

        params = {'copyright': 'Václav Čadek', 'model_name': 'Iris Model'}
        self.model = model
        self.pmml = keras2pmml(self.model, **params)
        self.num_inputs = self.model.input_shape[1]
        self.num_outputs = self.model.output_shape[1]
        self.num_connection_layers = len(self.model.layers)
        self.features = ['x{}'.format(i) for i in range(self.num_inputs)]
        self.class_values = ['y{}'.format(i) for i in range(self.num_outputs)]

Source File: 3_feature_union_solution.py From pandas-feature-union with MIT License

6 votes

def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = PandasFeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean())

Source File: test_rfe.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_rfecv_verbose_output():
    # Check verbose=1 is producing an output.
    from io import StringIO
    import sys
    sys.stdout = StringIO()

    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)

    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, verbose=1)
    rfecv.fit(X, y)

    verbose_output = sys.stdout
    verbose_output.seek(0)
    assert_greater(len(verbose_output.readline()), 0)

Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

Source File: test_nfpc.py From fylearn with MIT License

6 votes

def test_build_ps_owa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(
        membership_factory=t_factory,
        aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer())
    )

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 < mean

Source File: test_GaussianNB.py From differential-privacy-library with MIT License

6 votes

def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf = GaussianNB(epsilon=5.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = clf.score(x_test, y_test)
        counts = clf.class_count_.copy()
        self.assertGreater(accuracy, 0.5)

        clf.partial_fit(x_train, y_train)
        new_counts = clf.class_count_
        self.assertEqual(np.sum(new_counts), np.sum(counts) * 2)

Source File: test_GaussianNB.py From differential-privacy-library with MIT License

6 votes

def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)

        self.assertFalse(np.all(same_prediction))

Source File: test_LinearRegression.py From differential-privacy-library with MIT License

6 votes

def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, epsilon=float("inf"),
                               bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LinearRegression(normalize=False)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.allclose(predict1, predict2))

Source File: test_LinearRegression.py From differential-privacy-library with MIT License

6 votes

def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LinearRegression()
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2))

Source File: test_LogisticRegression.py From differential-privacy-library with MIT License

6 votes

def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2))

Source File: test_rfe.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_rfecv_grid_scores_size():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)   # regression test: list should be supported

    # Non-regression test for varying combinations of step and
    # min_features_to_select.
    for step, min_features_to_select in [[2, 1], [2, 2], [3, 3]]:
        rfecv = RFECV(estimator=MockClassifier(), step=step,
                      min_features_to_select=min_features_to_select, cv=5)
        rfecv.fit(X, y)

        score_len = np.ceil(
            (X.shape[1] - min_features_to_select) / step) + 1
        assert len(rfecv.grid_scores_) == score_len
        assert len(rfecv.ranking_) == X.shape[1]
        assert rfecv.n_features_ >= min_features_to_select

Source File: test_impute.py From skutil with BSD 3-Clause "New" or "Revised" License

6 votes

def test_bagged_imputer_classification():
    iris = load_iris()

    # make DF, add species col
    X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
    X['species'] = iris.target

    # shuffle...
    X = shuffle_dataframe(X)

    # set random indices to be null.. 15% should be good
    rands = np.random.rand(X.shape[0])
    mask = rands > 0.85
    X['species'].iloc[mask] = np.nan

    # define imputer, assert no missing
    imputer = BaggedCategoricalImputer(cols=['species'])
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...'

    # now test with a different estimator
    imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier())
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...'

Source File: test_few.py From few with GNU General Public License v3.0

6 votes

def test_few_classification():
    """test_few.py: tests default classification settings"""
    np.random.seed(42)
    X, y = load_iris(return_X_y=True)
    train,test = train_test_split(np.arange(X.shape[0]), train_size=0.75,
                                  test_size=0.25)
    few = FEW(classification=True,population_size='1x',generations=10)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))

    # test boolean output
    few = FEW(classification=True,otype='b',population_size='2x',
              seed_with_ml=False,generations=10)
    np.random.seed(42)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))
    few.print_model()

Source File: test_graph_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graph_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)

Source File: test_graph_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graph_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)

Source File: test_rfe.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=20,
                                 random_state=generator, max_depth=2)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert_equal(len(rfe.ranking_), X.shape[1])

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    assert_array_equal(rfe.get_support(), rfe_svc.get_support())

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification_report_multiclass_with_label_detection():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with label detection
    expected_report = """\
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        24
           1       0.33      0.10      0.15        31
           2       0.42      0.90      0.57        20

    accuracy                           0.53        75
   macro avg       0.53      0.60      0.51        75
weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)

Source File: test_multiclass.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2)

Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)

Source File: test_unsupervised.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_correct_labelsize():
    # Assert 1 < n_labels < n_samples
    dataset = datasets.load_iris()
    X = dataset.data

    # n_labels = n_samples
    y = np.arange(X.shape[0])
    assert_raises_regexp(ValueError,
                         r'Number of labels is %d\. Valid values are 2 '
                         r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y)

    # n_labels = 1
    y = np.zeros(X.shape[0])
    assert_raises_regexp(ValueError,
                         r'Number of labels is %d\. Valid values are 2 '
                         r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y)

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification_report_multiclass_with_digits():
    # Test performance report with added digits in floating point values
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa    0.82609   0.79167   0.80851        24
  versicolor    0.33333   0.09677   0.15000        31
   virginica    0.41860   0.90000   0.57143        20

    accuracy                        0.53333        75
   macro avg    0.52601   0.59615   0.50998        75
weighted avg    0.51375   0.53333   0.47310        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names, digits=5)
    assert_equal(report, expected_report)

Source File: test_estimator_checks.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris
    iris = load_iris()

    for Estimator in [GaussianMixture, LinearRegression,
                      RandomForestClassifier, NMF, SGDClassifier,
                      MiniBatchKMeans]:
        with ignore_warnings(category=(FutureWarning, DeprecationWarning)):
            # when 'est = SGDClassifier()'
            est = Estimator()
            set_checking_parameters(est)
            set_random_state(est)
            # without fitting
            old_hash = _joblib.hash(est)
            check_estimator(est)
        assert_equal(old_hash, _joblib.hash(est))

        with ignore_warnings(category=(FutureWarning, DeprecationWarning)):
            # when 'est = SGDClassifier()'
            est = Estimator()
            set_checking_parameters(est)
            set_random_state(est)
            # with fitting
            est.fit(iris.data + 10, iris.target)
            old_hash = _joblib.hash(est)
            check_estimator(est)
        assert_equal(old_hash, _joblib.hash(est))

Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_graph_lasso_2D():
    # Hard-coded solution from Python skggm package
    # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)`
    cov_skggm = np.array([[3.09550269, 1.186972],
                         [1.186972, 0.57713289]])

    icov_skggm = np.array([[1.52836773, -3.14334831],
                          [-3.14334831,  8.19753385]])
    X = datasets.load_iris().data[:, 2:]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=.1, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_skggm)
        assert_array_almost_equal(icov, icov_skggm)

Python sklearn.datasets.load_iris() Examples