Python sklearn.linear_model.LassoCV() Examples
The following are 29
code examples of sklearn.linear_model.LassoCV().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.linear_model
, or try the search function
.
Example #1
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1 # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
Example #2
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_lasso_cv_with_some_model_selection(): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedKFold from sklearn import datasets from sklearn.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline( StandardScaler(), LassoCV(cv=StratifiedKFold(n_splits=5)) ) pipe.fit(X, y)
Example #3
Source File: ewa.py From pycobra with MIT License | 6 votes |
def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']): """ Loads 4 different scikit-learn regressors by default. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. """ for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_) except ValueError: continue
Example #4
Source File: gd_poisoners.py From manip-ml with MIT License | 6 votes |
def learn_model(self, x, y, clf, lam = None): if (lam is None and self.initlam != -1): # hack for first training lam = self.initlam if clf is None: if lam is None: clf = linear_model.LassoCV(max_iter=10000) clf.fit(x, y) lam = clf.alpha_ clf = linear_model.Lasso(alpha = lam, \ max_iter = 10000, \ warm_start = True) clf.fit(x, y) return clf, lam ############################################################################################ # Implements GD Poisoning for Ridge Linear Regression ############################################################################################
Example #5
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_lasso_cv_with_some_model_selection(): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedKFold from sklearn import datasets from sklearn.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline( StandardScaler(), LassoCV(cv=StratifiedKFold(n_splits=5)) ) pipe.fit(X, y)
Example #6
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
Example #7
Source File: feature_selection.py From CatLearn with GNU General Public License v3.0 | 6 votes |
def feature_inspection(self, lower=0, upper=1, interval=10**2, alpha_list=None): """Generate interval used to search for the alpha. Parameters ---------- lower : int Lower bound for the interval search. upper : int Upper bound for the interval search. interval: int Number of alphas in interval inspected. """ feat_vec, alpha_vec = [], [] if alpha_list is None: alpha_list = np.linspace(float(upper), float(lower), int(interval)) for alpha in alpha_list: model = LassoCV(alphas=[alpha], cv=3).fit(X=self.train_features, y=self.train_targets) feat_vec.append(np.shape(np.nonzero(model.coef_))[1]) alpha_vec.append(alpha) return feat_vec, alpha_vec, np.nonzero(model.coef_)
Example #8
Source File: FeatureSelector.py From CDSS with GNU General Public License v3.0 | 5 votes |
def _eliminate_recursively(self, k=None): if self._problem == FeatureSelector.CLASSIFICATION: estimator = RandomForestClassifier(random_state=self._random_state) else: estimator = LassoCV(random_state=self._random_state) # If k is not specified, then use RFECV to automatically decide on # optimal number of features. If specified, then use RFE. if k is None: self._selector = RFECV(estimator) else: self._selector = RFE(estimator, n_features_to_select=k, step=0.05)
Example #9
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
Example #10
Source File: Regressor.py From CDSS with GNU General Public License v3.0 | 5 votes |
def _train_lasso(self, X, y): self._model = LassoCV() self._model.fit(X, y)
Example #11
Source File: test_sklearn_glm_regressor_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_lasso_cv(self): model, X = fit_regression_model(linear_model.LassoCV()) model_onnx = convert_sklearn( model, "lasso cv", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, basename="SklearnLassoCV-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #12
Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.linear_model.ARDRegression, lm.ARDRegression) self.assertIs(df.linear_model.BayesianRidge, lm.BayesianRidge) self.assertIs(df.linear_model.ElasticNet, lm.ElasticNet) self.assertIs(df.linear_model.ElasticNetCV, lm.ElasticNetCV) self.assertIs(df.linear_model.HuberRegressor, lm.HuberRegressor) self.assertIs(df.linear_model.Lars, lm.Lars) self.assertIs(df.linear_model.LarsCV, lm.LarsCV) self.assertIs(df.linear_model.Lasso, lm.Lasso) self.assertIs(df.linear_model.LassoCV, lm.LassoCV) self.assertIs(df.linear_model.LassoLars, lm.LassoLars) self.assertIs(df.linear_model.LassoLarsCV, lm.LassoLarsCV) self.assertIs(df.linear_model.LassoLarsIC, lm.LassoLarsIC) self.assertIs(df.linear_model.LinearRegression, lm.LinearRegression) self.assertIs(df.linear_model.LogisticRegression, lm.LogisticRegression) self.assertIs(df.linear_model.LogisticRegressionCV, lm.LogisticRegressionCV) self.assertIs(df.linear_model.MultiTaskLasso, lm.MultiTaskLasso) self.assertIs(df.linear_model.MultiTaskElasticNet, lm.MultiTaskElasticNet) self.assertIs(df.linear_model.MultiTaskLassoCV, lm.MultiTaskLassoCV) self.assertIs(df.linear_model.MultiTaskElasticNetCV, lm.MultiTaskElasticNetCV) self.assertIs(df.linear_model.OrthogonalMatchingPursuit, lm.OrthogonalMatchingPursuit) self.assertIs(df.linear_model.OrthogonalMatchingPursuitCV, lm.OrthogonalMatchingPursuitCV) self.assertIs(df.linear_model.PassiveAggressiveClassifier, lm.PassiveAggressiveClassifier) self.assertIs(df.linear_model.PassiveAggressiveRegressor, lm.PassiveAggressiveRegressor) self.assertIs(df.linear_model.Perceptron, lm.Perceptron) self.assertIs(df.linear_model.RandomizedLasso, lm.RandomizedLasso) self.assertIs(df.linear_model.RandomizedLogisticRegression, lm.RandomizedLogisticRegression) self.assertIs(df.linear_model.RANSACRegressor, lm.RANSACRegressor) self.assertIs(df.linear_model.Ridge, lm.Ridge) self.assertIs(df.linear_model.RidgeClassifier, lm.RidgeClassifier) self.assertIs(df.linear_model.RidgeClassifierCV, lm.RidgeClassifierCV) self.assertIs(df.linear_model.RidgeCV, lm.RidgeCV) self.assertIs(df.linear_model.SGDClassifier, lm.SGDClassifier) self.assertIs(df.linear_model.SGDRegressor, lm.SGDRegressor) self.assertIs(df.linear_model.TheilSenRegressor, lm.TheilSenRegressor)
Example #13
Source File: beamformers_electrodes_tweak.py From mmvt with GNU General Public License v3.0 | 5 votes |
def calc_optimization_features(optimization_method, freqs_bins, cond, meg_data_dic, elec_data, electrodes, from_t, to_t, optimization_params={}): # scorer = make_scorer(rol_corr, False) cv_parameters = [] if optimization_method in ['Ridge', 'RidgeCV', 'Lasso', 'LassoCV', 'ElasticNet', 'ElasticNetCV']: # vstack all meg data, such that X.shape = T*n X F, where n is the electrodes num # Y is T*n * 1 X = np.hstack((meg_data_dic[electrode][:, from_t:to_t] for electrode in electrodes)) Y = np.hstack((elec_data[electrode][cond][from_t:to_t] for electrode in electrodes)) funcs_dic = {'Ridge': Ridge(alpha=0.1), 'RidgeCV':RidgeCV(np.logspace(0, -10, 11)), # scoring=scorer 'Lasso': Lasso(alpha=1.0/X.shape[0]), 'LassoCV':LassoCV(alphas=np.logspace(0, -10, 11), max_iter=1000), 'ElasticNetCV': ElasticNetCV(alphas= np.logspace(0, -10, 11), l1_ratio=np.linspace(0, 1, 11))} clf = funcs_dic[optimization_method] clf.fit(X.T, Y) p = clf.coef_ if len(p) != len(freqs_bins): raise Exception('{} (len(clf.coef)) != {} (len(freqs_bin))!!!'.format(len(p), len(freqs_bins))) if optimization_method in ['RidgeCV', 'LassoCV']: cv_parameters = clf.alpha_ elif optimization_method == 'ElasticNetCV': cv_parameters = [clf.alpha_, clf.l1_ratio_] args = [(meg_pred(p, meg_data_dic[electrode][:, from_t:to_t]), elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes] p0 = leastsq(post_ridge_err_func, [1], args=args, maxfev=0)[0] p = np.hstack((p0, p)) elif optimization_method in ['leastsq', 'dtw', 'minmax', 'diff_rms', 'rol_corr']: args = ([(meg_data_dic[electrode][:, from_t:to_t], elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes], optimization_params) p0 = np.ones((1, len(freqs_bins)+1)) funcs_dic = {'leastsq': partial(leastsq, func=err_func, x0=p0, args=args), 'dtw': partial(minimize, fun=dtw_err_func, x0=p0, args=args), 'minmax': partial(minimize, fun=minmax_err_func, x0=p0, args=args), 'diff_rms': partial(minimize, fun=min_diff_rms_err_func, x0=p0, args=args), 'rol_corr': partial(minimize, fun=max_rol_corr, x0=p0, args=args)} res = funcs_dic[optimization_method]() p = res[0] if optimization_method=='leastsq' else res.x cv_parameters = optimization_params else: raise Exception('Unknown optimization_method! {}'.format(optimization_method)) return p, cv_parameters
Example #14
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_lasso_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) clf_unconstrained.fit(X, y) assert_true(min(clf_unconstrained.coef_) < 0) # On same data, constrained fit has non-negative coefficients clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, positive=True, cv=2, n_jobs=1) clf_constrained.fit(X, y) assert_true(min(clf_constrained.coef_) >= 0)
Example #15
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_uniform_targets(): enet = ElasticNetCV(fit_intercept=True, n_alphas=3) m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3) lasso = LassoCV(fit_intercept=True, n_alphas=3) m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3) models_single_task = (enet, lasso) models_multi_task = (m_enet, m_lasso) rng = np.random.RandomState(0) X_train = rng.random_sample(size=(10, 3)) X_test = rng.random_sample(size=(10, 3)) y1 = np.empty(10) y2 = np.empty((10, 2)) for model in models_single_task: for y_values in (0, 5): y1.fill(y_values) assert_array_equal(model.fit(X_train, y1).predict(X_test), y1) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) for model in models_multi_task: for y_values in (0, 5): y2[:, 0].fill(y_values) y2[:, 1].fill(2 * y_values) assert_array_equal(model.fit(X_train, y2).predict(X_test), y2) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
Example #16
Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
Example #17
Source File: TermDocMatrix.py From scattertext with Apache License 2.0 | 5 votes |
def get_logistic_regression_coefs_l1(self, category, clf=LassoCV(alphas=[0.1, 0.001], max_iter=10000, n_jobs=-1)): ''' Computes l1-penalized logistic regression score. Parameters ---------- category : str category name to score Returns ------- (coefficient array, accuracy, majority class baseline accuracy) ''' try: from sklearn.cross_validation import cross_val_predict except: from sklearn.model_selection import cross_val_predict y = self._get_mask_from_category(category) y_continuous = self._get_continuous_version_boolean_y(y) # X = TfidfTransformer().fit_transform(self._X) X = self._X clf.fit(X, y_continuous) y_hat = (cross_val_predict(clf, X, y_continuous) > 0) acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat) clf.fit(X, y_continuous) return clf.coef_, acc, baseline
Example #18
Source File: kernelcobra.py From pycobra with MIT License | 5 votes |
def load_default(self, machine_list='basic'): """ Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. Default is basic, Returns ------- self : returns an instance of self. """ if machine_list == 'basic': machine_list = ['tree', 'ridge', 'random_forest', 'svm'] if machine_list == 'advanced': machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd'] self.estimators_ = {} for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_) if machine == 'sgd': self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'bayesian_ridge': self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_) except ValueError: continue return self
Example #19
Source File: cobra.py From pycobra with MIT License | 5 votes |
def load_default(self, machine_list='basic'): """ Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. Returns ------- self : returns an instance of self. """ if machine_list == 'basic': machine_list = ['tree', 'ridge', 'random_forest', 'svm'] if machine_list == 'advanced': machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd'] self.estimators_ = {} for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = LinearSVR(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'sgd': self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'bayesian_ridge': self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_) except ValueError: continue return self
Example #20
Source File: scikit_wrapper.py From CatLearn with GNU General Public License v3.0 | 5 votes |
def _lasso(self): """Order features according to their corresponding coefficients.""" if self.line_search: pred = None try: alpha_list = np.geomspace(self.max_alpha, self.min_alpha, self.steps) except AttributeError: alpha_list = np.exp(np.linspace(np.log(self.max_alpha), np.log(self.min_alpha), self.steps)) for alpha in alpha_list: regr = Lasso(alpha=alpha, max_iter=self.iter, fit_intercept=True, normalize=True, selection='random') model = regr.fit(self.train_matrix, self.train_target) nz = len(model.coef_) - (model.coef_ == 0.).sum() if nz >= self.size: coeff = model.coef_ break else: regr = LassoCV(fit_intercept=True, normalize=True, n_alphas=self.steps, max_iter=self.iter, eps=self.eps, cv=None) model = regr.fit(X=self.train_matrix, y=self.train_target) coeff = model.coef_ # Make the linear prediction. pred = None if self.predict: data = model.predict(self.test_matrix) pred = get_error(prediction=data, target=self.test_target)['average'] return coeff, pred
Example #21
Source File: Booster.py From Fast-and-Accurate-Least-Mean-Squares-Solvers with MIT License | 5 votes |
def get_new_clf(solver, folds=3, alphas=100): kf=KFold(n_splits=folds,shuffle=False) if "linear" == solver: clf = linear_model.LinearRegression(fit_intercept=False) if "ridge" == solver: alphas = np.arange(1/alphas, 10+ 1/alphas, 10/alphas) clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=kf) elif "lasso" == solver: clf=linear_model.LassoCV(n_alphas=alphas, fit_intercept=False, cv=kf) elif "elastic" == solver: clf = linear_model.ElasticNetCV(n_alphas=alphas, fit_intercept=False, cv=kf) return clf
Example #22
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
Example #23
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
Example #24
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_uniform_targets(): enet = ElasticNetCV(fit_intercept=True, n_alphas=3) m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3) lasso = LassoCV(fit_intercept=True, n_alphas=3) m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3) models_single_task = (enet, lasso) models_multi_task = (m_enet, m_lasso) rng = np.random.RandomState(0) X_train = rng.random_sample(size=(10, 3)) X_test = rng.random_sample(size=(10, 3)) y1 = np.empty(10) y2 = np.empty((10, 2)) for model in models_single_task: for y_values in (0, 5): y1.fill(y_values) assert_array_equal(model.fit(X_train, y1).predict(X_test), y1) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) for model in models_multi_task: for y_values in (0, 5): y2[:, 0].fill(y_values) y2[:, 1].fill(2 * y_values) assert_array_equal(model.fit(X_train, y2).predict(X_test), y2) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
Example #25
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_lasso_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) clf_unconstrained.fit(X, y) assert min(clf_unconstrained.coef_) < 0 # On same data, constrained fit has non-negative coefficients clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, positive=True, cv=2, n_jobs=1) clf_constrained.fit(X, y) assert min(clf_constrained.coef_) >= 0
Example #26
Source File: match_space.py From SparseSC with MIT License | 5 votes |
def _D_LassoCV_MatchSpace( X, Y, X_full, D_full, v_pens=None, n_v_cv=5, sample_frac=1, y_V_share=0.5, **kwargs ): # pylint: disable=missing-param-doc, unused-argument if sample_frac < 1: N_y = X.shape[0] sample_y = np.random.choice(N_y, int(sample_frac * N_y), replace=False) X = X[sample_y, :] Y = Y[sample_y, :] N_d = D_full.shape[0] sample_d = np.random.choice(N_d, int(sample_frac * N_d), replace=False) X_full = X_full[sample_d, :] D_full = D_full[sample_d] y_varselectorfit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit( X, Y ) y_V = np.sqrt( np.sum(np.square(y_varselectorfit.coef_), axis=0) ) # n_tasks x n_features -> n_feature best_y_v_pen = y_varselectorfit.alpha_ d_varselectorfit = LassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit( X_full, D_full ) d_V = np.abs(d_varselectorfit.coef_) best_d_v_pen = d_varselectorfit.alpha_ m_sel = (y_V + d_V) != 0 transformer = SelMatchSpace(m_sel) if y_V.sum() == 0: V = d_V elif d_V.sum() == 0: V = y_V else: V = y_V_share * y_V / (y_V.sum()) + (1 - y_V_share) * d_V / (2 * d_V.sum()) return transformer, V[m_sel], (best_y_v_pen, best_d_v_pen), V
Example #27
Source File: feature_selection.py From ecg-classification with GNU General Public License v3.0 | 4 votes |
def run_feature_selection(features, labels, feature_selection, best_features): if feature_selection == 'select_K_Best': # feature extraction selector = SelectKBest(score_func=f_classif, k=4) # score_func=chi2 : only for non-negative features selector.fit(features, labels) # summarize scores scores = selector.scores_ features_index_sorted = np.argsort(-scores) features_selected = features[:, features_index_sorted[0:best_features]] # SelectFromModel and LassoCV # We use the base estimator LassoCV since the L1 norm promotes sparsity of features. if feature_selection == 'LassoCV': clf = LassoCV() # Set a minimum threshold of 0.25 sfm = SelectFromModel(clf, threshold=0.95) sfm.fit(features, labels) features_selected = sfm.transform(features).shape[1] """ # Reset the threshold till the number of features equals two. # Note that the attribute can be set directly instead of repeatedly # fitting the metatransformer. while n_features > 2: sfm.threshold += 0.1 X_transform = sfm.transform(X) n_features = X_transform.shape[1] """ # Univariate feature selection # Univariate feature selection works by selecting the best features based on univariate statistical tests. # It can be seen as a preprocessing step to an estimator. # Scikit-learn exposes feature selection routines as objects that implement the transform method: # - SelectKBest removes all but the k highest scoring features # - SelectPercentile removes all but a user-specified highest scoring percentage of features # common univariate statistical tests for each feature: false positive rate SelectFpr, false discovery rate SelectFdr, or family wise error SelectFwe. # - GenericUnivariateSelect allows to perform univariate feature selection with a configurable strategy. This allows to select the best univariate selection strategy with hyper-parameter search estimator. if feature_selection == 'slct_percentile': selector = SelectPercentile(f_classif, percentile=10) selector.fit(features, labels) # The percentile not affect. # Just select in order the top features by number or threshold # Keep best 8 values? scores = selector.scores_ features_index_sorted = np.argsort(-scores) # scores = selector.scores_ # scores = -np.log10(selector.pvalues_) # scores /= scores.max() features_selected = features[:, features_index_sorted[0:best_features]] print("Selected only " + str(features_selected.shape) + " features ") return features_selected, features_index_sorted
Example #28
Source File: modeler.py From rsmtool with Apache License 2.0 | 4 votes |
def train_positive_lasso_cv(self, df_train, feature_columns): """ Train `PositiveLassoCV` (formerly lassoWtLassoBest) - Feature selection using lasso regression optimized for log likelihood using cross validation. Parameters ---------- df_train : pd.DataFrame Data frame containing the features on which to train the model. feature_columns : list A list of feature columns to use in training the model. Returns ------- learner : skll.Learner The SKLL learner object fit : statsmodels.RegressionResults A statsmodels regression results object or None. df_coef : pd.DataFrame The model coefficients in a data_frame used_features : list A list of features used in the final model. """ # train a LassoCV outside of SKLL since it's not exposed there X = df_train[feature_columns].values y = df_train['sc1'].values clf = LassoCV(cv=10, positive=True, random_state=1234567890) model = clf.fit(X, y) # save the non-zero model coefficients and intercept to a data frame non_zero_features, non_zero_feature_values = [], [] for feature, coefficient in zip(feature_columns, model.coef_): if coefficient != 0: non_zero_features.append(feature) non_zero_feature_values.append(coefficient) # initialize the coefficient data frame with just the intercept df_coef = pd.DataFrame([('Intercept', model.intercept_)]) df_coef = df_coef.append(list(zip(non_zero_features, non_zero_feature_values)), ignore_index=True) df_coef.columns = ['feature', 'coefficient'] # create a fake SKLL learner with these non-zero weights learner = self.create_fake_skll_learner(df_coef) # there's no OLS fit object in this case fit = None # we used only the non-zero features used_features = non_zero_features return learner, fit, df_coef, used_features
Example #29
Source File: modeler.py From rsmtool with Apache License 2.0 | 4 votes |
def train_positive_lasso_cv_then_lr(self, df_train, feature_columns): """ Train `PositiveLassoCVThenLR` (formerly empWtLassoBest) - First do feature selection using lasso regression optimized for log likelihood using cross validation and then use only those features to train a second linear regression Parameters ---------- df_train : pd.DataFrame Data frame containing the features on which to train the model. feature_columns : list A list of feature columns to use in training the model. Returns ------- learner : skll.Learner The SKLL learner object fit : statsmodels.RegressionResults A statsmodels regression results object. df_coef : pd.DataFrame The model coefficients in a data_frame used_features : list A list of features used in the final model. """ # train a LassoCV outside of SKLL since it's not exposed there X = df_train[feature_columns].values y = df_train['sc1'].values clf = LassoCV(cv=10, positive=True, random_state=1234567890) model = clf.fit(X, y) # get the non-zero features from this model non_zero_features = [] for feature, coefficient in zip(feature_columns, model.coef_): if coefficient != 0: non_zero_features.append(feature) # now train a new linear regression with just these non-zero features X = df_train[non_zero_features] X = sm.add_constant(X) fit = sm.OLS(df_train['sc1'], X).fit() # convert the model parameters into a data frame df_coef = self.ols_coefficients_to_dataframe(fit.params) # create fake SKLL learner with these coefficients learner = self.create_fake_skll_learner(df_coef) # we used only the non-zero features used_features = non_zero_features return learner, fit, df_coef, used_features