Python sklearn.datasets.load_diabetes() Examples

The following are 30 code examples of sklearn.datasets.load_diabetes(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .
Example #1
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 8 votes vote down vote up
def test_LassoCV(self, criterion):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target

        X = pp.normalize(X)

        df = pdml.ModelFrame(diabetes)
        df.data = df.data.pp.normalize()

        mod1 = lm.LassoLarsIC(criterion=criterion)
        mod1.fit(X, y)

        mod2 = df.lm.LassoLarsIC(criterion=criterion)
        df.fit(mod2)
        self.assertAlmostEqual(mod1.alpha_, mod2.alpha_)

        expected = mod1.predict(X)
        predicted = df.predict(mod2)
        self.assertIsInstance(predicted, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(predicted.values, expected) 
Example #2
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 8 votes vote down vote up
def test_lasso_path(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.lasso_path()
        expected = lm.lasso_path(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 3)
        tm.assert_numpy_array_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].index, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])

        result = df.linear_model.lasso_path(return_models=True)
        expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True)
        self.assertEqual(len(result), len(expected))
        self.assertIsInstance(result, tuple)
        tm.assert_numpy_array_equal(result[0], result[0])
        tm.assert_numpy_array_equal(result[1], result[1])
        tm.assert_numpy_array_equal(result[2], result[2]) 
Example #3
Source File: task.py    From cloud-ml-sdk with Apache License 2.0 7 votes vote down vote up
def main():
  diabetes = datasets.load_diabetes()
  diabetes_X = diabetes.data[:, np.newaxis, 2]

  diabetes_X_train = diabetes_X[:-20]
  diabetes_X_test = diabetes_X[-20:]

  diabetes_y_train = diabetes.target[:-20]
  diabetes_y_test = diabetes.target[-20:]

  regr = linear_model.LinearRegression()
  regr.fit(diabetes_X_train, diabetes_y_train)

  print('Coefficients: \n', regr.coef_)
  print("Mean squared error: %.2f" %
        np.mean((regr.predict(diabetes_X_test) - diabetes_y_test)**2))
  print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test)) 
Example #4
Source File: test_h2o_converters.py    From onnxmltools with MIT License 6 votes vote down vote up
def test_h2o_regressor(self):
        diabetes = load_diabetes()
        train, test = _train_test_split_as_frames(diabetes.data, diabetes.target)
        dists = ["auto", "gaussian", "huber", "laplace", "quantile"]
        for d in dists:
            gbm = H2OGradientBoostingEstimator(ntrees=7, max_depth=5, distribution=d)
            mojo_path = _make_mojo(gbm, train)
            onnx_model = _convert_mojo(mojo_path)
            self.assertIsNot(onnx_model, None)
            dump_data_and_model(
                test,
                H2OMojoWrapper(mojo_path),
                onnx_model,
                basename="H2OReg-Dec4",
                allow_failure="StrictVersion("
                              "onnx.__version__)"
                              "< StrictVersion('1.3.0')",
            ) 
Example #5
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_MixedLM(self):
        import statsmodels.regression.mixed_linear_model as mlm
        diabetes = datasets.load_diabetes()
        models = ['MixedLM']
        data = diabetes.data[:100, :]
        target = diabetes.target[:100]
        groups = np.array([0] * 50 + [1] * 50)
        for model in models:
            klass = getattr(sm, model)

            estimator = base.StatsModelsRegressor(klass, groups=groups)
            fitted = estimator.fit(data, target)
            # result = estimator.predict(diabetes.data)
            # NotImplementedError
            self.assertIsInstance(fitted, mlm.MixedLMResultsWrapper)

            # expected = klass(target, data, groups=groups).fit().predict(diabetes.data)
            # self.assert_numpy_array_almost_equal(result, expected) 
Example #6
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_pipeline(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression
        from sklearn.pipeline import Pipeline

        diabetes = datasets.load_diabetes()
        models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM']

        for model in models:
            klass = getattr(sm, model)

            selector = SelectKBest(f_regression, k=5)
            estimator = Pipeline([('selector', selector),
                                  ('reg', base.StatsModelsRegressor(klass))])

            estimator.fit(diabetes.data, diabetes.target)
            result = estimator.predict(diabetes.data)

            data = SelectKBest(f_regression, k=5).fit_transform(diabetes.data, diabetes.target)
            expected = klass(diabetes.target, data).fit().predict(data)
            self.assert_numpy_array_almost_equal(result, expected) 
Example #7
Source File: test_mimic_explainer.py    From interpret-community with MIT License 6 votes vote down vote up
def _timeseries_generated_data(self):
        # Load diabetes data and convert to data frame
        x, y = datasets.load_diabetes(return_X_y=True)
        nrows, ncols = x.shape
        column_names = [str(i) for i in range(ncols)]
        X = pd.DataFrame(x, columns=column_names)

        # Add an arbitrary time axis
        time_column_name = "Date" + str(uuid.uuid4())
        dates = pd.date_range('1980-01-01', periods=nrows, freq='MS')
        X[time_column_name] = dates
        index_keys = [time_column_name]
        X.set_index(index_keys, inplace=True)

        # Split into train and test sets
        test_frac = 0.2
        cutoff_index = int(np.floor((1.0 - test_frac) * nrows))

        X_train = X.iloc[:cutoff_index]
        y_train = y[:cutoff_index]
        X_test = X.iloc[cutoff_index:]
        y_test = y[cutoff_index:]

        return X_train, X_test, y_train, y_test, time_column_name 
Example #8
Source File: test_svm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0),
                svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear', C=10.),
                svm.LinearSVR(C=10.),
                svm.LinearSVR(C=10.),
                ):
        clf.fit(diabetes.data, diabetes.target)
        assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data))) 
Example #9
Source File: test_bayes.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_bayesian_on_diabetes():
    # Test BayesianRidge on diabetes
    raise SkipTest("XFailed Test")
    diabetes = datasets.load_diabetes()
    X, y = diabetes.data, diabetes.target

    clf = BayesianRidge(compute_score=True)

    # Test with more samples than features
    clf.fit(X, y)
    # Test that scores are increasing at each iteration
    assert_array_equal(np.diff(clf.scores_) > 0, True)

    # Test with more features than samples
    X = X[:5, :]
    y = y[:5]
    clf.fit(X, y)
    # Test that scores are increasing at each iteration
    assert_array_equal(np.diff(clf.scores_) > 0, True) 
Example #10
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y) 
Example #11
Source File: test_xgboost_converters.py    From onnxmltools with MIT License 6 votes vote down vote up
def test_xgb_regressor(self):
        iris = load_diabetes()
        x = iris.data
        y = iris.target
        x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
                                                       random_state=42)
        xgb = XGBRegressor()
        xgb.fit(x_train, y_train)
        conv_model = convert_xgboost(
            xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
        self.assertTrue(conv_model is not None)
        dump_data_and_model(
            x_test.astype("float32"),
            xgb,
            conv_model,
            basename="SklearnXGBRegressor-Dec3",
            allow_failure="StrictVersion("
            "onnx.__version__)"
            "< StrictVersion('1.3.0')",
        ) 
Example #12
Source File: test_feature_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_f_classif(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.feature_selection.f_classif()
        expected = fs.f_classif(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 2)
        tm.assert_numpy_array_equal(result[0], expected[0])
        tm.assert_numpy_array_equal(result[1], expected[1]) 
Example #13
Source File: test_feature_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_f_regression(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.feature_selection.f_regression()
        expected = fs.f_regression(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1]) 
Example #14
Source File: test_neighbors.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_Neigbors(self, algo):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        mod1 = getattr(df.neighbors, algo)()
        mod2 = getattr(neighbors, algo)()

        df.fit(mod1)
        mod2.fit(diabetes.data, diabetes.target)

        result = df.predict(mod1)
        expected = mod2.predict(diabetes.data)
        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected) 
Example #15
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_orthogonal_mp(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.orthogonal_mp()
        expected = lm.orthogonal_mp(diabetes.data, diabetes.target)
        tm.assert_numpy_array_equal(result, expected)

        result = df.linear_model.orthogonal_mp(return_path=True)
        expected = lm.orthogonal_mp(diabetes.data, diabetes.target, return_path=True)
        tm.assert_numpy_array_equal(result, expected) 
Example #16
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2]) 
Example #17
Source File: test_discriminant_analysis.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_LDA(self, algo):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        mod1 = getattr(df.da, algo)()
        mod2 = getattr(da, algo)()

        df.fit(mod1)
        mod2.fit(diabetes.data, diabetes.target)

        result = df.predict(mod1)
        expected = mod2.predict(diabetes.data)
        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected) 
Example #18
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_OLS(self):
        diabetes = datasets.load_diabetes()
        estimator = base.StatsModelsRegressor(sm.OLS)
        fitted = estimator.fit(diabetes.data, diabetes.target)
        result = estimator.predict(diabetes.data)

        # estimator.score(diabetes.data, diabetes.target)

        import statsmodels.regression.linear_model as lm
        self.assertIsInstance(fitted, lm.RegressionResultsWrapper)

        fitted2 = sm.OLS(diabetes.target, diabetes.data).fit()
        expected = fitted2.predict(diabetes.data)
        self.assert_numpy_array_almost_equal(result, expected) 
Example #19
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_precict(self):
        diabetes = datasets.load_diabetes()
        estimator = base.StatsModelsRegressor(sm.OLS)
        with pytest.raises(ValueError, match='StatsModelsRegressor is not fitted to data'):
            estimator.predict(diabetes.data) 
Example #20
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_Regressions(self):
        diabetes = datasets.load_diabetes()
        models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM']

        for model in models:
            klass = getattr(sm, model)

            estimator = base.StatsModelsRegressor(klass)
            estimator.fit(diabetes.data, diabetes.target)
            result = estimator.predict(diabetes.data)

            expected = klass(diabetes.target, diabetes.data).fit().predict(diabetes.data)
            self.assert_numpy_array_almost_equal(result, expected) 
Example #21
Source File: test_base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_gridsearch(self):
        import sklearn.model_selection as ms
        tuned_parameters = {'statsmodel': [sm.OLS, sm.GLS]}
        diabetes = datasets.load_diabetes()

        cv = ms.GridSearchCV(base.StatsModelsRegressor(sm.OLS), tuned_parameters, cv=5, scoring=None)
        fitted = cv.fit(diabetes.data, diabetes.target)
        self.assertTrue(fitted.best_estimator_.statsmodel is sm.OLS) 
Example #22
Source File: test_score_objects.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2) 
Example #23
Source File: test_svm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_linearsvr():
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
    score2 = svr.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(svr.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2) 
Example #24
Source File: test_svm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_linearsvr_fit_sampleweight():
    # check correct result when sample_weight is 1
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    n_samples = len(diabetes.target)
    unit_weight = np.ones(n_samples)
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                    sample_weight=unit_weight)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                           sample_weight=random_weight)
    score3 = lsvr_unflat.score(diabetes.data, diabetes.target,
                               sample_weight=random_weight)

    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
    lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat)
    score4 = lsvr_flat.score(X_flat, y_flat)

    assert_almost_equal(score3, score4, 2) 
Example #25
Source File: test_base.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_load_diabetes():
    res = load_diabetes()
    assert_equal(res.data.shape, (442, 10))
    assert_true(res.target.size, 442)
    assert_equal(len(res.feature_names), 10)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_diabetes(return_X_y=True)
    bunch = load_diabetes()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target) 
Example #26
Source File: test_h2o_converters.py    From onnxmltools with MIT License 5 votes vote down vote up
def test_h2o_regressor_unsupported_dists(self):
        diabetes = load_diabetes()
        train, test = _train_test_split_as_frames(diabetes.data, diabetes.target)
        not_supported_dists = ["poisson", "gamma", "tweedie"]
        for d in not_supported_dists:
            gbm = H2OGradientBoostingEstimator(ntrees=7, max_depth=5, distribution=d)
            mojo_path = _make_mojo(gbm, train)
            with self.assertRaises(ValueError) as err:
                _convert_mojo(mojo_path)
            self.assertRegexpMatches(err.exception.args[0], "not supported") 
Example #27
Source File: load_sample_data.py    From MLOpsPython with MIT License 5 votes vote down vote up
def create_sample_data_csv(file_name: str = "diabetes.csv",
                           for_scoring: bool = False):
    sample_data = load_diabetes()
    df = pd.DataFrame(
        data=sample_data.data,
        columns=sample_data.feature_names)
    if not for_scoring:
        df['Y'] = sample_data.target
    # Hard code to diabetes so we fail fast if the project has been
    # bootstrapped.
    df.to_csv(file_name, index=False) 
Example #28
Source File: test_score_objects.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2) 
Example #29
Source File: test_svm.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0),
                svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear', C=10.),
                svm.LinearSVR(C=10.),
                svm.LinearSVR(C=10.),
                ):
        clf.fit(diabetes.data, diabetes.target)
        assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR(gamma='scale').fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data))) 
Example #30
Source File: test_svm.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_linearsvr():
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
    score2 = svr.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(svr.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)