Python sklearn.datasets.make_regression() Examples
The following are 30
code examples of sklearn.datasets.make_regression().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 8 votes |
def test_multi_target_regression_partial_fit(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test, y_test = X[50:], y[50:] references = np.zeros_like(y_test) half_index = 25 for n in range(3): sgr = SGDRegressor(random_state=0, max_iter=5) sgr.partial_fit(X_train[:half_index], y_train[:half_index, n]) sgr.partial_fit(X_train[half_index:], y_train[half_index:, n]) references[:, n] = sgr.predict(X_test) sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) sgr.partial_fit(X_train[:half_index], y_train[:half_index]) sgr.partial_fit(X_train[half_index:], y_train[half_index:]) y_pred = sgr.predict(X_test) assert_almost_equal(references, y_pred) assert not hasattr(MultiOutputRegressor(Lasso), 'partial_fit')
Example #2
Source File: test_gradient_boosting.py From pygbm with MIT License | 7 votes |
def test_early_stopping_regression(scoring, validation_split, n_iter_no_change, tol): max_iter = 500 X, y = make_regression(random_state=0) gb = GradientBoostingRegressor(verbose=1, # just for coverage scoring=scoring, tol=tol, validation_split=validation_split, max_iter=max_iter, n_iter_no_change=n_iter_no_change, random_state=0) gb.fit(X, y) if n_iter_no_change is not None: assert n_iter_no_change <= gb.n_iter_ < max_iter else: assert gb.n_iter_ == max_iter
Example #3
Source File: test_ridge.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ridge_fit_intercept_sparse(): X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, bias=10., random_state=42) X_csr = sp.csr_matrix(X) for solver in ['sag', 'sparse_cg']: dense = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True) sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True) dense.fit(X, y) with pytest.warns(None) as record: sparse.fit(X_csr, y) assert len(record) == 0 assert_almost_equal(dense.intercept_, sparse.intercept_) assert_array_almost_equal(dense.coef_, sparse.coef_) # test the solver switch and the corresponding warning for solver in ['saga', 'lsqr']: sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True) assert_raises_regex(ValueError, "In Ridge,", sparse.fit, X_csr, y)
Example #4
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_early_stopping_regression(scoring, validation_fraction, n_iter_no_change, tol): max_iter = 200 X, y = make_regression(random_state=0) gb = HistGradientBoostingRegressor( verbose=1, # just for coverage min_samples_leaf=5, # easier to overfit fast scoring=scoring, tol=tol, validation_fraction=validation_fraction, max_iter=max_iter, n_iter_no_change=n_iter_no_change, random_state=0 ) gb.fit(X, y) if n_iter_no_change is not None: assert n_iter_no_change <= gb.n_iter_ < max_iter else: assert gb.n_iter_ == max_iter
Example #5
Source File: test_contrasts.py From nistats with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_fixed_effect_contrast_nonzero_effect(): X, y = make_regression(n_features=5, n_samples=20, random_state=0) y = y[:, None] labels, results = run_glm(y, X, 'ols') coef = LinearRegression(fit_intercept=False).fit(X, y).coef_ for i in range(X.shape[1]): contrast = np.zeros(X.shape[1]) contrast[i] = 1. fixed_effect = _compute_fixed_effect_contrast([labels], [results], [contrast], ) assert_almost_equal(fixed_effect.effect_size(), coef.ravel()[i]) fixed_effect = _compute_fixed_effect_contrast( [labels] * 3, [results] * 3, [contrast] * 3) assert_almost_equal(fixed_effect.effect_size(), coef.ravel()[i])
Example #6
Source File: util.py From nyaggle with MIT License | 6 votes |
def make_regression_df(n_samples: int = 1024, n_num_features: int = 20, n_cat_features: int = 0, feature_name: str = 'col_{}', target_name: str = 'target', random_state: int = 0, id_column: str = None) -> Tuple[pd.DataFrame, pd.Series]: np.random.seed(random_state) X, y = make_regression(n_samples=n_samples, n_features=n_num_features, random_state=random_state) X = pd.DataFrame(X, columns=[feature_name.format(i) for i in range(n_num_features)]) y = pd.Series(y, name=target_name) if id_column is not None: X[id_column] = range(n_samples) for i in range(n_cat_features): X['cat_{}'.format(i)] = \ pd.Series(np.random.choice(['A', 'B', None], size=n_samples)).astype(str).astype('category') return X, y
Example #7
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative neg_mse_scores = cross_val_score(reg, X, y, cv=5, scoring="neg_mean_squared_error") expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
Example #8
Source File: test_mlp.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_shuffle(): # Test that the shuffle parameter affects the training process (it should) X, y = make_regression(n_samples=50, n_features=5, n_targets=1, random_state=0) # The coefficients will be identical if both do or do not shuffle for shuffle in [True, False]: mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp1.fit(X, y) mlp2.fit(X, y) assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0]) # The coefficients will be slightly different if shuffle=True mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=True) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=False) mlp1.fit(X, y) mlp2.fit(X, y) assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
Example #9
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multi_target_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test, y_test = X[50:], y[50:] references = np.zeros_like(y_test) for n in range(3): rgr = GradientBoostingRegressor(random_state=0) rgr.fit(X_train, y_train[:, n]) references[:, n] = rgr.predict(X_test) rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X_train, y_train) y_pred = rgr.predict(X_test) assert_almost_equal(references, y_pred) # 0.23. warning about tol not having its correct default value.
Example #10
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative neg_mse_scores = cross_val_score(reg, X, y, cv=5, scoring="neg_mean_squared_error") expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
Example #11
Source File: test_samples_generator.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_make_regression(): X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3, effective_rank=5, coef=True, bias=0.0, noise=1.0, random_state=0) assert_equal(X.shape, (100, 10), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(c.shape, (10,), "coef shape mismatch") assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features") # Test that y ~= np.dot(X, c) + bias + N(0, 1.0). assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1) # Test with small number of features. X, y = make_regression(n_samples=100, n_features=1) # n_informative=3 assert_equal(X.shape, (100, 1))
Example #12
Source File: test_mlp_regressor.py From muffnn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_prediction_gradient(): """Test computation of prediction gradients.""" mlp = MLPRegressor(n_epochs=100, random_state=42, hidden_units=(5,)) X, y = make_regression( n_samples=1000, n_features=10, n_informative=1, shuffle=False) mlp.fit(X, y) grad = mlp.prediction_gradient(X) grad_means = grad.mean(axis=0) assert grad.shape == X.shape # Check that only the informative feature has a large gradient. assert np.abs(grad_means[0]) > 0.5 for m in grad_means[1:]: assert np.abs(m) < 0.1 # Raise an exception for sparse inputs, which are not yet supported. X_sp = sp.csr_matrix(X) mlp.fit(X_sp, y) with pytest.raises(NotImplementedError): mlp.prediction_gradient(X_sp)
Example #13
Source File: test_pyglmnet.py From pyglmnet with MIT License | 6 votes |
def test_cv(): """Simple CV check.""" # XXX: don't use scikit-learn for tests. X, y = make_regression() cv = KFold(n_splits=5) glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1) # check that it returns 5 scores scores = cross_val_score(glm_normal, X, y, cv=cv) assert(len(scores) == 5) param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)}, {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))}] glmcv = GridSearchCV(glm_normal, param_grid, cv=cv) glmcv.fit(X, y)
Example #14
Source File: test_nn.py From mljar-supervised with MIT License | 6 votes |
def setUpClass(cls): cls.X, cls.y = datasets.make_regression( n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0 ) cls.params = { "dense_layers": 2, "dense_1_size": 8, "dense_2_size": 4, "dropout": 0, "learning_rate": 0.01, "momentum": 0.9, "decay": 0.001, "ml_task": "regression" } cls.y = preprocessing.scale(cls.y)
Example #15
Source File: test_parsing_options.py From sklearn-onnx with MIT License | 6 votes |
def test_kmeans(self): model = KMeans() X, y = make_regression(n_features=4, random_state=42) model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] with self.assertRaises(RuntimeError): convert_sklearn(model, initial_types=initial_types, final_types=[('output4', None)]) with self.assertRaises(RuntimeError): convert_sklearn(model, initial_types=initial_types, final_types=[('dup1', None), ('dup1', None)], target_opset=TARGET_OPSET) model_onnx = convert_sklearn( model, initial_types=initial_types, final_types=[('output4', None), ('output5', None)], target_opset=TARGET_OPSET) assert model_onnx is not None sess = InferenceSession(model_onnx.SerializeToString()) assert sess.get_outputs()[0].name == 'output4' assert sess.get_outputs()[1].name == 'output5'
Example #16
Source File: discrete_choice_data_generator.py From cs-ranking with Apache License 2.0 | 6 votes |
def make_linear_transitive( self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, n_informative=10, seed=42, **kwd, ): random_state = check_random_state(seed=seed) X, y, coeff = make_regression( n_samples=n_instances * n_objects, n_features=n_features, n_informative=n_informative, coef=True, noise=noise, random_state=random_state, ) X = X.reshape(n_instances, n_objects, n_features) y = y.reshape(n_instances, n_objects) Y = y.argmax(axis=1) Y = convert_to_label_encoding(Y, n_objects) return X, Y
Example #17
Source File: object_ranking_data_generator.py From cs-ranking with Apache License 2.0 | 6 votes |
def make_linear_transitive( self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, n_informative=10, seed=42, **kwd, ): random_state = check_random_state(seed=seed) X, y, coeff = make_regression( n_samples=n_instances * n_objects, n_features=n_features, n_informative=n_informative, coef=True, noise=noise, random_state=random_state, ) X = X.reshape(n_instances, n_objects, n_features) y = y.reshape(n_instances, n_objects) Y = scores_to_rankings(y) return X, Y
Example #18
Source File: test_stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_smoke_multiout_regression_methods(n_jobs): """Construct, fit, and predict on realistic problem. """ X, y = make_regression(random_state=7, n_samples=100, n_features=10, n_informative=4, n_targets=2) rng = np.random.RandomState(17) est_list = [('lr', LinearRegression()), ('rf', RandomForestRegressor(random_state=rng, n_estimators=10)), ('metalr', LinearRegression())] sm = StackedRegressor(est_list, n_jobs=n_jobs) sm.fit(X, y) sm.predict(X) sm.score(X, y) with pytest.raises(AttributeError): sm.predict_proba(X)
Example #19
Source File: data_manager.py From Auto-PyTorch with Apache License 2.0 | 6 votes |
def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0): """Generate a regression task Arguments: num_features {int} -- Number of features num_samples {int} -- Number of samples Keyword Arguments: test_split {float} -- Size of test split (default: {0.1}) seed {int} -- a random seed (default: {0}) """ X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed) self.categorical_features = [False] * num_features self.problem_type = ProblemType.FeatureRegression self.X, self.Y = X, Y self._split_data(test_split, seed)
Example #20
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_decision_tree_regressor(self): # Train model training_data = datasets.make_regression(n_features=5) regressor = DecisionTreeRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]] test_results = regressor.predict(test_data) # Serialise the models to Elasticsearch feature_names = ["f0", "f1", "f2", "f3", "f4"] model_id = "test_decision_tree_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) es_results = es_model.predict(test_data) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #21
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_random_forest_regressor(self): # Train model training_data = datasets.make_regression(n_features=5) regressor = RandomForestRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]] test_results = regressor.predict(test_data) # Serialise the models to Elasticsearch feature_names = ["f0", "f1", "f2", "f3", "f4"] model_id = "test_random_forest_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) es_results = es_model.predict(test_data) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #22
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_xgb_regressor(self): # Train model training_data = datasets.make_regression(n_features=5) regressor = XGBRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]] test_results = regressor.predict(np.asarray(test_data)) # Serialise the models to Elasticsearch feature_names = ["f0", "f1", "f2", "f3", "f4"] model_id = "test_xgb_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) es_results = es_model.predict(test_data) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #23
Source File: test_imported_ml_model_pytest.py From eland with Apache License 2.0 | 6 votes |
def test_predict_single_feature_vector(self): # Train model training_data = datasets.make_regression(n_features=1) regressor = XGBRegressor() regressor.fit(training_data[0], training_data[1]) # Get some test results test_data = [[0.1]] test_results = regressor.predict(np.asarray(test_data)) # Serialise the models to Elasticsearch feature_names = ["f0"] model_id = "test_xgb_regressor" es_model = ImportedMLModel( ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True ) # Single feature es_results = es_model.predict(test_data[0]) np.testing.assert_almost_equal(test_results, es_results, decimal=2) # Clean up es_model.delete_model()
Example #24
Source File: test_core_operators.py From lale with Apache License 2.0 | 5 votes |
def setUp(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y)
Example #25
Source File: test_core_operators.py From lale with Apache License 2.0 | 5 votes |
def setUp(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y)
Example #26
Source File: test_utils.py From causallib with Apache License 2.0 | 5 votes |
def test_check_regression_learner_is_fitted(self): from sklearn.linear_model import LinearRegression from sklearn.tree import ExtraTreeRegressor from sklearn.ensemble import GradientBoostingRegressor from sklearn.svm import SVR from sklearn.datasets import make_regression X, y = make_regression() for regr in [LinearRegression(), ExtraTreeRegressor(), GradientBoostingRegressor(), SVR()]: self.ensure_learner_is_fitted(regr, X, y)
Example #27
Source File: test_parsing_options.py From sklearn-onnx with MIT License | 5 votes |
def test_pipeline(self): model = Pipeline( [('sc1', StandardScaler()), ('sc2', StandardScaler())]) X, y = make_regression(n_features=4, random_state=42) model.fit(X) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn(model, initial_types=initial_types) assert model_onnx is not None model_onnx = convert_sklearn( model, initial_types=initial_types, final_types=[('output', None)]) sess = InferenceSession(model_onnx.SerializeToString()) assert sess.get_outputs()[0].name == 'output' model_onnx = convert_sklearn( model, initial_types=initial_types, final_types=[('output4', None)]) sess = InferenceSession(model_onnx.SerializeToString()) assert sess.get_outputs()[0].name == 'output4' model_onnx = convert_sklearn( model, initial_types=initial_types, final_types=[('output4', DoubleTensorType())]) try: sess = InferenceSession(model_onnx.SerializeToString()) except RuntimeError as e: if "Cast(9)" in str(e): return raise e assert sess.get_outputs()[0].name == 'output4' assert str(sess.get_outputs()[0].type) == "tensor(double)"
Example #28
Source File: test_parsing_options.py From sklearn-onnx with MIT License | 5 votes |
def test_decisiontree_regressor(self): model = DecisionTreeRegressor(max_depth=2) X, y = make_regression(n_features=4, random_state=42) model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn(model, initial_types=initial_types, final_types=[('output4', None)]) assert model_onnx is not None sess = InferenceSession(model_onnx.SerializeToString()) assert sess.get_outputs()[0].name == 'output4'
Example #29
Source File: test_autogen_lib.py From lale with Apache License 2.0 | 5 votes |
def load_regression(): return datasets.make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
Example #30
Source File: regression_generator.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _prepare_for_use(self): self._random_state = check_random_state(self.random_state) self.X, self.y = make_regression(n_samples=self.n_samples, n_features=self.n_features, n_informative=self.n_informative, n_targets=self.n_targets, random_state=self._random_state) self.y = np.resize(self.y, (self.y.size, self.n_targets)) self.target_names = ["target_" + str(i) for i in range(self.n_targets)] self.feature_names = ["att_num_" + str(i) for i in range(self.n_num_features)] self.target_values = [float] * self.n_targets