Python sklearn.datasets.load_diabetes() Examples
The following are 30
code examples of sklearn.datasets.load_diabetes().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 8 votes |
def test_LassoCV(self, criterion): diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target X = pp.normalize(X) df = pdml.ModelFrame(diabetes) df.data = df.data.pp.normalize() mod1 = lm.LassoLarsIC(criterion=criterion) mod1.fit(X, y) mod2 = df.lm.LassoLarsIC(criterion=criterion) df.fit(mod2) self.assertAlmostEqual(mod1.alpha_, mod2.alpha_) expected = mod1.predict(X) predicted = df.predict(mod2) self.assertIsInstance(predicted, pdml.ModelSeries) self.assert_numpy_array_almost_equal(predicted.values, expected)
Example #2
Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 8 votes |
def test_lasso_path(self): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) result = df.linear_model.lasso_path() expected = lm.lasso_path(diabetes.data, diabetes.target) self.assertEqual(len(result), 3) tm.assert_numpy_array_equal(result[0], expected[0]) self.assertIsInstance(result[1], pdml.ModelFrame) tm.assert_index_equal(result[1].index, df.data.columns) self.assert_numpy_array_almost_equal(result[1].values, expected[1]) self.assert_numpy_array_almost_equal(result[2], expected[2]) result = df.linear_model.lasso_path(return_models=True) expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True) self.assertEqual(len(result), len(expected)) self.assertIsInstance(result, tuple) tm.assert_numpy_array_equal(result[0], result[0]) tm.assert_numpy_array_equal(result[1], result[1]) tm.assert_numpy_array_equal(result[2], result[2])
Example #3
Source File: task.py From cloud-ml-sdk with Apache License 2.0 | 7 votes |
def main(): diabetes = datasets.load_diabetes() diabetes_X = diabetes.data[:, np.newaxis, 2] diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] diabetes_y_train = diabetes.target[:-20] diabetes_y_test = diabetes.target[-20:] regr = linear_model.LinearRegression() regr.fit(diabetes_X_train, diabetes_y_train) print('Coefficients: \n', regr.coef_) print("Mean squared error: %.2f" % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test)**2)) print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))
Example #4
Source File: test_h2o_converters.py From onnxmltools with MIT License | 6 votes |
def test_h2o_regressor(self): diabetes = load_diabetes() train, test = _train_test_split_as_frames(diabetes.data, diabetes.target) dists = ["auto", "gaussian", "huber", "laplace", "quantile"] for d in dists: gbm = H2OGradientBoostingEstimator(ntrees=7, max_depth=5, distribution=d) mojo_path = _make_mojo(gbm, train) onnx_model = _convert_mojo(mojo_path) self.assertIsNot(onnx_model, None) dump_data_and_model( test, H2OMojoWrapper(mojo_path), onnx_model, basename="H2OReg-Dec4", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
Example #5
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_MixedLM(self): import statsmodels.regression.mixed_linear_model as mlm diabetes = datasets.load_diabetes() models = ['MixedLM'] data = diabetes.data[:100, :] target = diabetes.target[:100] groups = np.array([0] * 50 + [1] * 50) for model in models: klass = getattr(sm, model) estimator = base.StatsModelsRegressor(klass, groups=groups) fitted = estimator.fit(data, target) # result = estimator.predict(diabetes.data) # NotImplementedError self.assertIsInstance(fitted, mlm.MixedLMResultsWrapper) # expected = klass(target, data, groups=groups).fit().predict(diabetes.data) # self.assert_numpy_array_almost_equal(result, expected)
Example #6
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_pipeline(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn.pipeline import Pipeline diabetes = datasets.load_diabetes() models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM'] for model in models: klass = getattr(sm, model) selector = SelectKBest(f_regression, k=5) estimator = Pipeline([('selector', selector), ('reg', base.StatsModelsRegressor(klass))]) estimator.fit(diabetes.data, diabetes.target) result = estimator.predict(diabetes.data) data = SelectKBest(f_regression, k=5).fit_transform(diabetes.data, diabetes.target) expected = klass(diabetes.target, data).fit().predict(data) self.assert_numpy_array_almost_equal(result, expected)
Example #7
Source File: test_mimic_explainer.py From interpret-community with MIT License | 6 votes |
def _timeseries_generated_data(self): # Load diabetes data and convert to data frame x, y = datasets.load_diabetes(return_X_y=True) nrows, ncols = x.shape column_names = [str(i) for i in range(ncols)] X = pd.DataFrame(x, columns=column_names) # Add an arbitrary time axis time_column_name = "Date" + str(uuid.uuid4()) dates = pd.date_range('1980-01-01', periods=nrows, freq='MS') X[time_column_name] = dates index_keys = [time_column_name] X.set_index(index_keys, inplace=True) # Split into train and test sets test_frac = 0.2 cutoff_index = int(np.floor((1.0 - test_frac) * nrows)) X_train = X.iloc[:cutoff_index] y_train = y[:cutoff_index] X_test = X.iloc[cutoff_index:] y_test = y[cutoff_index:] return X_train, X_test, y_train, y_test, time_column_name
Example #8
Source File: test_svm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_svr(): # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.), svm.SVR(kernel='linear', C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.), ): clf.fit(diabetes.data, diabetes.target) assert_greater(clf.score(diabetes.data, diabetes.target), 0.02) # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
Example #9
Source File: test_bayes.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_bayesian_on_diabetes(): # Test BayesianRidge on diabetes raise SkipTest("XFailed Test") diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target clf = BayesianRidge(compute_score=True) # Test with more samples than features clf.fit(X, y) # Test that scores are increasing at each iteration assert_array_equal(np.diff(clf.scores_) > 0, True) # Test with more features than samples X = X[:5, :] y = y[:5] clf.fit(X, y) # Test that scores are increasing at each iteration assert_array_equal(np.diff(clf.scores_) > 0, True)
Example #10
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_lasso_cv_with_some_model_selection(): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedKFold from sklearn import datasets from sklearn.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline( StandardScaler(), LassoCV(cv=StratifiedKFold(n_splits=5)) ) pipe.fit(X, y)
Example #11
Source File: test_xgboost_converters.py From onnxmltools with MIT License | 6 votes |
def test_xgb_regressor(self): iris = load_diabetes() x = iris.data y = iris.target x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) xgb = XGBRegressor() xgb.fit(x_train, y_train) conv_model = convert_xgboost( xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), xgb, conv_model, basename="SklearnXGBRegressor-Dec3", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
Example #12
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_f_classif(self): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) result = df.feature_selection.f_classif() expected = fs.f_classif(diabetes.data, diabetes.target) self.assertEqual(len(result), 2) tm.assert_numpy_array_equal(result[0], expected[0]) tm.assert_numpy_array_equal(result[1], expected[1])
Example #13
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_f_regression(self): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) result = df.feature_selection.f_regression() expected = fs.f_regression(diabetes.data, diabetes.target) self.assertEqual(len(result), 2) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
Example #14
Source File: test_neighbors.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_Neigbors(self, algo): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) mod1 = getattr(df.neighbors, algo)() mod2 = getattr(neighbors, algo)() df.fit(mod1) mod2.fit(diabetes.data, diabetes.target) result = df.predict(mod1) expected = mod2.predict(diabetes.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
Example #15
Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_orthogonal_mp(self): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) result = df.linear_model.orthogonal_mp() expected = lm.orthogonal_mp(diabetes.data, diabetes.target) tm.assert_numpy_array_equal(result, expected) result = df.linear_model.orthogonal_mp(return_path=True) expected = lm.orthogonal_mp(diabetes.data, diabetes.target, return_path=True) tm.assert_numpy_array_equal(result, expected)
Example #16
Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_Lasso_Path(self): diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target X /= X.std(axis=0) df = pdml.ModelFrame(diabetes) df.data /= df.data.std(axis=0, ddof=False) self.assert_numpy_array_almost_equal(df.data.values, X) eps = 5e-3 expected = lm.lasso_path(X, y, eps, fit_intercept=False) result = df.lm.lasso_path(eps=eps, fit_intercept=False) self.assert_numpy_array_almost_equal(expected[0], result[0]) self.assert_numpy_array_almost_equal(expected[1], result[1]) self.assert_numpy_array_almost_equal(expected[2], result[2]) expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False) result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False) self.assert_numpy_array_almost_equal(expected[0], result[0]) self.assert_numpy_array_almost_equal(expected[1], result[1]) self.assert_numpy_array_almost_equal(expected[2], result[2]) expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False) result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False) self.assert_numpy_array_almost_equal(expected[0], result[0]) self.assert_numpy_array_almost_equal(expected[1], result[1]) self.assert_numpy_array_almost_equal(expected[2], result[2]) expected = lm.lars_path(X, y, method='lasso', verbose=True) result = df.lm.lars_path(method='lasso', verbose=True) self.assert_numpy_array_almost_equal(expected[0], result[0]) self.assert_numpy_array_almost_equal(expected[1], result[1]) self.assert_numpy_array_almost_equal(expected[2], result[2])
Example #17
Source File: test_discriminant_analysis.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_LDA(self, algo): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) mod1 = getattr(df.da, algo)() mod2 = getattr(da, algo)() df.fit(mod1) mod2.fit(diabetes.data, diabetes.target) result = df.predict(mod1) expected = mod2.predict(diabetes.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
Example #18
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_OLS(self): diabetes = datasets.load_diabetes() estimator = base.StatsModelsRegressor(sm.OLS) fitted = estimator.fit(diabetes.data, diabetes.target) result = estimator.predict(diabetes.data) # estimator.score(diabetes.data, diabetes.target) import statsmodels.regression.linear_model as lm self.assertIsInstance(fitted, lm.RegressionResultsWrapper) fitted2 = sm.OLS(diabetes.target, diabetes.data).fit() expected = fitted2.predict(diabetes.data) self.assert_numpy_array_almost_equal(result, expected)
Example #19
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_precict(self): diabetes = datasets.load_diabetes() estimator = base.StatsModelsRegressor(sm.OLS) with pytest.raises(ValueError, match='StatsModelsRegressor is not fitted to data'): estimator.predict(diabetes.data)
Example #20
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_Regressions(self): diabetes = datasets.load_diabetes() models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM'] for model in models: klass = getattr(sm, model) estimator = base.StatsModelsRegressor(klass) estimator.fit(diabetes.data, diabetes.target) result = estimator.predict(diabetes.data) expected = klass(diabetes.target, diabetes.data).fit().predict(diabetes.data) self.assert_numpy_array_almost_equal(result, expected)
Example #21
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_gridsearch(self): import sklearn.model_selection as ms tuned_parameters = {'statsmodel': [sm.OLS, sm.GLS]} diabetes = datasets.load_diabetes() cv = ms.GridSearchCV(base.StatsModelsRegressor(sm.OLS), tuned_parameters, cv=5, scoring=None) fitted = cv.fit(diabetes.data, diabetes.target) self.assertTrue(fitted.best_estimator_.statsmodel is sm.OLS)
Example #22
Source File: test_score_objects.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
Example #23
Source File: test_svm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_linearsvr(): # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score1 = lsvr.score(diabetes.data, diabetes.target) svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target) score2 = svr.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2)
Example #24
Source File: test_svm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_linearsvr_fit_sampleweight(): # check correct result when sample_weight is 1 # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() n_samples = len(diabetes.target) unit_weight = np.ones(n_samples) lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, sample_weight=unit_weight) score1 = lsvr.score(diabetes.data, diabetes.target) lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score2 = lsvr_no_weight.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2) # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) random_weight = random_state.randint(0, 10, n_samples) lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, sample_weight=random_weight) score3 = lsvr_unflat.score(diabetes.data, diabetes.target, sample_weight=random_weight) X_flat = np.repeat(diabetes.data, random_weight, axis=0) y_flat = np.repeat(diabetes.target, random_weight, axis=0) lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat) score4 = lsvr_flat.score(X_flat, y_flat) assert_almost_equal(score3, score4, 2)
Example #25
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_load_diabetes(): res = load_diabetes() assert_equal(res.data.shape, (442, 10)) assert_true(res.target.size, 442) assert_equal(len(res.feature_names), 10) assert_true(res.DESCR) # test return_X_y option X_y_tuple = load_diabetes(return_X_y=True) bunch = load_diabetes() assert_true(isinstance(X_y_tuple, tuple)) assert_array_equal(X_y_tuple[0], bunch.data) assert_array_equal(X_y_tuple[1], bunch.target)
Example #26
Source File: test_h2o_converters.py From onnxmltools with MIT License | 5 votes |
def test_h2o_regressor_unsupported_dists(self): diabetes = load_diabetes() train, test = _train_test_split_as_frames(diabetes.data, diabetes.target) not_supported_dists = ["poisson", "gamma", "tweedie"] for d in not_supported_dists: gbm = H2OGradientBoostingEstimator(ntrees=7, max_depth=5, distribution=d) mojo_path = _make_mojo(gbm, train) with self.assertRaises(ValueError) as err: _convert_mojo(mojo_path) self.assertRegexpMatches(err.exception.args[0], "not supported")
Example #27
Source File: load_sample_data.py From MLOpsPython with MIT License | 5 votes |
def create_sample_data_csv(file_name: str = "diabetes.csv", for_scoring: bool = False): sample_data = load_diabetes() df = pd.DataFrame( data=sample_data.data, columns=sample_data.feature_names) if not for_scoring: df['Y'] = sample_data.target # Hard code to diabetes so we fail fast if the project has been # bootstrapped. df.to_csv(file_name, index=False)
Example #28
Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
Example #29
Source File: test_svm.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_svr(): # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.), svm.SVR(kernel='linear', C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.), ): clf.fit(diabetes.data, diabetes.target) assert_greater(clf.score(diabetes.data, diabetes.target), 0.02) # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC svm.SVR(gamma='scale').fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
Example #30
Source File: test_svm.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_linearsvr(): # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score1 = lsvr.score(diabetes.data, diabetes.target) svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target) score2 = svr.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2)