Python Examples of sklearn.linear

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
                  np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y)

Source File: ewa.py From pycobra with MIT License

6 votes

def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.

        """
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue

Source File: gd_poisoners.py From manip-ml with MIT License

6 votes

def learn_model(self, x, y, clf, lam = None):
        if (lam is None and self.initlam != -1): # hack for first training
            lam = self.initlam
        if clf is None:
            if lam is None:
                clf = linear_model.LassoCV(max_iter=10000)
                clf.fit(x, y)
                lam = clf.alpha_
            clf = linear_model.Lasso(alpha = lam, \
                                 max_iter = 10000, \
                                 warm_start = True)
        clf.fit(x, y)
        return clf, lam


############################################################################################
# Implements GD Poisoning for Ridge Linear Regression
############################################################################################

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

6 votes

def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y)

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

6 votes

def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert_true(np.abs(
        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)

Source File: feature_selection.py From CatLearn with GNU General Public License v3.0

6 votes

def feature_inspection(self, lower=0, upper=1, interval=10**2,
                           alpha_list=None):
        """Generate interval used to search for the alpha.

        Parameters
        ----------
        lower : int
            Lower bound for the interval search.
        upper : int
            Upper bound for the interval search.
        interval: int
            Number of alphas in interval inspected.
        """
        feat_vec, alpha_vec = [], []
        if alpha_list is None:
            alpha_list = np.linspace(float(upper), float(lower), int(interval))
        for alpha in alpha_list:
            model = LassoCV(alphas=[alpha], cv=3).fit(X=self.train_features,
                                                      y=self.train_targets)
            feat_vec.append(np.shape(np.nonzero(model.coef_))[1])
            alpha_vec.append(alpha)
        return feat_vec, alpha_vec, np.nonzero(model.coef_)

Source File: FeatureSelector.py From CDSS with GNU General Public License v3.0

5 votes

def _eliminate_recursively(self, k=None):
        if self._problem == FeatureSelector.CLASSIFICATION:
            estimator = RandomForestClassifier(random_state=self._random_state)
        else:
            estimator = LassoCV(random_state=self._random_state)
        # If k is not specified, then use RFECV to automatically decide on
        # optimal number of features. If specified, then use RFE.
        if k is None:
            self._selector = RFECV(estimator)
        else:
            self._selector = RFE(estimator, n_features_to_select=k, step=0.05)

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

5 votes

def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)

Source File: Regressor.py From CDSS with GNU General Public License v3.0

5 votes

def _train_lasso(self, X, y):
        self._model = LassoCV()
        self._model.fit(X, y)

Source File: test_sklearn_glm_regressor_converter.py From sklearn-onnx with MIT License

5 votes

def test_model_lasso_cv(self):
        model, X = fit_regression_model(linear_model.LassoCV())
        model_onnx = convert_sklearn(
            model, "lasso cv",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnLassoCV-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )

Source File: test_linear_model.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.linear_model.ARDRegression, lm.ARDRegression)
        self.assertIs(df.linear_model.BayesianRidge, lm.BayesianRidge)
        self.assertIs(df.linear_model.ElasticNet, lm.ElasticNet)
        self.assertIs(df.linear_model.ElasticNetCV, lm.ElasticNetCV)

        self.assertIs(df.linear_model.HuberRegressor, lm.HuberRegressor)

        self.assertIs(df.linear_model.Lars, lm.Lars)
        self.assertIs(df.linear_model.LarsCV, lm.LarsCV)
        self.assertIs(df.linear_model.Lasso, lm.Lasso)
        self.assertIs(df.linear_model.LassoCV, lm.LassoCV)
        self.assertIs(df.linear_model.LassoLars, lm.LassoLars)
        self.assertIs(df.linear_model.LassoLarsCV, lm.LassoLarsCV)
        self.assertIs(df.linear_model.LassoLarsIC, lm.LassoLarsIC)

        self.assertIs(df.linear_model.LinearRegression, lm.LinearRegression)
        self.assertIs(df.linear_model.LogisticRegression, lm.LogisticRegression)
        self.assertIs(df.linear_model.LogisticRegressionCV, lm.LogisticRegressionCV)
        self.assertIs(df.linear_model.MultiTaskLasso, lm.MultiTaskLasso)
        self.assertIs(df.linear_model.MultiTaskElasticNet, lm.MultiTaskElasticNet)
        self.assertIs(df.linear_model.MultiTaskLassoCV, lm.MultiTaskLassoCV)
        self.assertIs(df.linear_model.MultiTaskElasticNetCV, lm.MultiTaskElasticNetCV)

        self.assertIs(df.linear_model.OrthogonalMatchingPursuit, lm.OrthogonalMatchingPursuit)
        self.assertIs(df.linear_model.OrthogonalMatchingPursuitCV, lm.OrthogonalMatchingPursuitCV)
        self.assertIs(df.linear_model.PassiveAggressiveClassifier, lm.PassiveAggressiveClassifier)
        self.assertIs(df.linear_model.PassiveAggressiveRegressor, lm.PassiveAggressiveRegressor)

        self.assertIs(df.linear_model.Perceptron, lm.Perceptron)
        self.assertIs(df.linear_model.RandomizedLasso, lm.RandomizedLasso)
        self.assertIs(df.linear_model.RandomizedLogisticRegression, lm.RandomizedLogisticRegression)
        self.assertIs(df.linear_model.RANSACRegressor, lm.RANSACRegressor)
        self.assertIs(df.linear_model.Ridge, lm.Ridge)
        self.assertIs(df.linear_model.RidgeClassifier, lm.RidgeClassifier)
        self.assertIs(df.linear_model.RidgeClassifierCV, lm.RidgeClassifierCV)
        self.assertIs(df.linear_model.RidgeCV, lm.RidgeCV)
        self.assertIs(df.linear_model.SGDClassifier, lm.SGDClassifier)
        self.assertIs(df.linear_model.SGDRegressor, lm.SGDRegressor)
        self.assertIs(df.linear_model.TheilSenRegressor, lm.TheilSenRegressor)

Source File: beamformers_electrodes_tweak.py From mmvt with GNU General Public License v3.0

5 votes

def calc_optimization_features(optimization_method, freqs_bins, cond, meg_data_dic, elec_data, electrodes, from_t, to_t, optimization_params={}):
    # scorer = make_scorer(rol_corr, False)
    cv_parameters = []
    if optimization_method in ['Ridge', 'RidgeCV', 'Lasso', 'LassoCV', 'ElasticNet', 'ElasticNetCV']:
        # vstack all meg data, such that X.shape = T*n X F, where n is the electrodes num
        # Y is T*n * 1
        X = np.hstack((meg_data_dic[electrode][:, from_t:to_t] for electrode in electrodes))
        Y = np.hstack((elec_data[electrode][cond][from_t:to_t] for electrode in electrodes))
        funcs_dic = {'Ridge': Ridge(alpha=0.1), 'RidgeCV':RidgeCV(np.logspace(0, -10, 11)), # scoring=scorer
            'Lasso': Lasso(alpha=1.0/X.shape[0]), 'LassoCV':LassoCV(alphas=np.logspace(0, -10, 11), max_iter=1000),
            'ElasticNetCV': ElasticNetCV(alphas= np.logspace(0, -10, 11), l1_ratio=np.linspace(0, 1, 11))}
        clf = funcs_dic[optimization_method]
        clf.fit(X.T, Y)
        p = clf.coef_
        if len(p) != len(freqs_bins):
            raise Exception('{} (len(clf.coef)) != {} (len(freqs_bin))!!!'.format(len(p), len(freqs_bins)))
        if optimization_method in ['RidgeCV', 'LassoCV']:
            cv_parameters = clf.alpha_
        elif optimization_method == 'ElasticNetCV':
            cv_parameters = [clf.alpha_, clf.l1_ratio_]
        args = [(meg_pred(p, meg_data_dic[electrode][:, from_t:to_t]), elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes]
        p0 = leastsq(post_ridge_err_func, [1], args=args, maxfev=0)[0]
        p = np.hstack((p0, p))
    elif optimization_method in ['leastsq', 'dtw', 'minmax', 'diff_rms', 'rol_corr']:
        args = ([(meg_data_dic[electrode][:, from_t:to_t], elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes], optimization_params)
        p0 = np.ones((1, len(freqs_bins)+1))
        funcs_dic = {'leastsq': partial(leastsq, func=err_func, x0=p0, args=args),
                     'dtw': partial(minimize, fun=dtw_err_func, x0=p0, args=args),
                     'minmax': partial(minimize, fun=minmax_err_func, x0=p0, args=args),
                     'diff_rms': partial(minimize, fun=min_diff_rms_err_func, x0=p0, args=args),
                     'rol_corr': partial(minimize, fun=max_rol_corr, x0=p0, args=args)}
        res = funcs_dic[optimization_method]()
        p = res[0] if optimization_method=='leastsq' else res.x
        cv_parameters = optimization_params
    else:
        raise Exception('Unknown optimization_method! {}'.format(optimization_method))
    return p, cv_parameters

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

5 votes

def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert_true(min(clf_unconstrained.coef_) < 0)

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert_true(min(clf_constrained.coef_) >= 0)

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

5 votes

def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

Source File: test_coordinate_descent.py From twitter-stock-recommendation with MIT License

5 votes

def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])

Source File: TermDocMatrix.py From scattertext with Apache License 2.0

5 votes

def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline

Source File: kernelcobra.py From pycobra with MIT License

5 votes

def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded. 
            Default is basic,
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self

Source File: cobra.py From pycobra with MIT License

5 votes

def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """

        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = LinearSVR(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self

Source File: scikit_wrapper.py From CatLearn with GNU General Public License v3.0

5 votes

def _lasso(self):
        """Order features according to their corresponding coefficients."""
        if self.line_search:
            pred = None
            try:
                alpha_list = np.geomspace(self.max_alpha, self.min_alpha,
                                          self.steps)
            except AttributeError:
                alpha_list = np.exp(np.linspace(np.log(self.max_alpha),
                                                np.log(self.min_alpha),
                                                self.steps))
            for alpha in alpha_list:
                regr = Lasso(alpha=alpha, max_iter=self.iter,
                             fit_intercept=True, normalize=True,
                             selection='random')
                model = regr.fit(self.train_matrix, self.train_target)
                nz = len(model.coef_) - (model.coef_ == 0.).sum()
                if nz >= self.size:
                    coeff = model.coef_
                    break
        else:
            regr = LassoCV(fit_intercept=True, normalize=True,
                           n_alphas=self.steps, max_iter=self.iter,
                           eps=self.eps, cv=None)
            model = regr.fit(X=self.train_matrix, y=self.train_target)
            coeff = model.coef_

            # Make the linear prediction.
            pred = None
            if self.predict:
                data = model.predict(self.test_matrix)
                pred = get_error(prediction=data,
                                 target=self.test_target)['average']

        return coeff, pred

Source File: Booster.py From Fast-and-Accurate-Least-Mean-Squares-Solvers with MIT License

5 votes

def get_new_clf(solver, folds=3, alphas=100):
    kf=KFold(n_splits=folds,shuffle=False)
    if "linear" == solver:
        clf = linear_model.LinearRegression(fit_intercept=False)
    if "ridge" == solver:
        alphas =  np.arange(1/alphas, 10+ 1/alphas, 10/alphas)
        clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=kf)
    elif "lasso" == solver:
        clf=linear_model.LassoCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    elif "elastic" == solver:
        clf = linear_model.ElasticNetCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    return clf

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert min(clf_unconstrained.coef_) < 0

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert min(clf_constrained.coef_) >= 0

Source File: match_space.py From SparseSC with MIT License

5 votes

def _D_LassoCV_MatchSpace(
    X, Y, X_full, D_full, v_pens=None, n_v_cv=5, sample_frac=1, y_V_share=0.5, **kwargs
):  # pylint: disable=missing-param-doc, unused-argument
    if sample_frac < 1:
        N_y = X.shape[0]
        sample_y = np.random.choice(N_y, int(sample_frac * N_y), replace=False)
        X = X[sample_y, :]
        Y = Y[sample_y, :]
        N_d = D_full.shape[0]
        sample_d = np.random.choice(N_d, int(sample_frac * N_d), replace=False)
        X_full = X_full[sample_d, :]
        D_full = D_full[sample_d]
    y_varselectorfit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit(
        X, Y
    )
    y_V = np.sqrt(
        np.sum(np.square(y_varselectorfit.coef_), axis=0)
    )  # n_tasks x n_features -> n_feature
    best_y_v_pen = y_varselectorfit.alpha_

    d_varselectorfit = LassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit(
        X_full, D_full
    )
    d_V = np.abs(d_varselectorfit.coef_)
    best_d_v_pen = d_varselectorfit.alpha_

    m_sel = (y_V + d_V) != 0
    transformer = SelMatchSpace(m_sel)
    if y_V.sum() == 0:
        V = d_V
    elif d_V.sum() == 0:
        V = y_V
    else:
        V = y_V_share * y_V / (y_V.sum()) + (1 - y_V_share) * d_V / (2 * d_V.sum())
    return transformer, V[m_sel], (best_y_v_pen, best_d_v_pen), V

Source File: feature_selection.py From ecg-classification with GNU General Public License v3.0

4 votes

def run_feature_selection(features, labels, feature_selection, best_features):
    
    if feature_selection == 'select_K_Best':
        # feature extraction
        selector = SelectKBest(score_func=f_classif, k=4) # score_func=chi2 : only for non-negative features
        selector.fit(features, labels)
        # summarize scores
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        features_selected = features[:, features_index_sorted[0:best_features]]

    # SelectFromModel and LassoCV
    
    # We use the base estimator LassoCV since the L1 norm promotes sparsity of features.
    if feature_selection == 'LassoCV':
        clf = LassoCV()

        # Set a minimum threshold of 0.25
        sfm = SelectFromModel(clf, threshold=0.95)
        sfm.fit(features, labels)
        features_selected = sfm.transform(features).shape[1]

        """
        # Reset the threshold till the number of features equals two.
        # Note that the attribute can be set directly instead of repeatedly
        # fitting the metatransformer.
        while n_features > 2:
            sfm.threshold += 0.1
            X_transform = sfm.transform(X)
            n_features = X_transform.shape[1]
        """

    # Univariate feature selection
    # Univariate feature selection works by selecting the best features based on univariate statistical tests. 
    # It can be seen as a preprocessing step to an estimator. 
    # Scikit-learn exposes feature selection routines as objects that implement the transform method:
    #   - SelectKBest removes all but the k highest scoring features
    #   - SelectPercentile removes all but a user-specified highest scoring percentage of features
    #       common univariate statistical tests for each feature: false positive rate SelectFpr, false discovery rate SelectFdr, or family wise error SelectFwe.
    #   - GenericUnivariateSelect allows to perform univariate feature selection with a configurable strategy. This allows to select the best univariate selection strategy with hyper-parameter search estimator.

    if feature_selection == 'slct_percentile':
        selector = SelectPercentile(f_classif, percentile=10)
        selector.fit(features, labels)
        # The percentile not affect. 
        # Just select in order the top features by number or threshold

        # Keep best 8 values?
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        # scores = selector.scores_

        # scores = -np.log10(selector.pvalues_)
        # scores /= scores.max()

        features_selected = features[:, features_index_sorted[0:best_features]]

    print("Selected only " + str(features_selected.shape) + " features ")

    return features_selected, features_index_sorted

Source File: modeler.py From rsmtool with Apache License 2.0

4 votes

def train_positive_lasso_cv(self, df_train, feature_columns):
        """
        Train `PositiveLassoCV` (formerly lassoWtLassoBest) -
        Feature selection using lasso regression optimized for log likelihood
        using cross validation.


        Parameters
        ----------
        df_train : pd.DataFrame
            Data frame containing the features on which
            to train the model.
        feature_columns : list
            A list of feature columns to use in training the model.

        Returns
        -------
        learner : skll.Learner
            The SKLL learner object
        fit : statsmodels.RegressionResults
            A statsmodels regression results object or None.
        df_coef : pd.DataFrame
            The model coefficients in a data_frame
        used_features : list
            A list of features used in the final model.
        """
        # train a LassoCV outside of SKLL since it's not exposed there
        X = df_train[feature_columns].values
        y = df_train['sc1'].values
        clf = LassoCV(cv=10, positive=True, random_state=1234567890)
        model = clf.fit(X, y)

        # save the non-zero model coefficients and intercept to a data frame
        non_zero_features, non_zero_feature_values = [], []
        for feature, coefficient in zip(feature_columns, model.coef_):
            if coefficient != 0:
                non_zero_features.append(feature)
                non_zero_feature_values.append(coefficient)

        # initialize the coefficient data frame with just the intercept
        df_coef = pd.DataFrame([('Intercept', model.intercept_)])
        df_coef = df_coef.append(list(zip(non_zero_features,
                                          non_zero_feature_values)), ignore_index=True)
        df_coef.columns = ['feature', 'coefficient']

        # create a fake SKLL learner with these non-zero weights
        learner = self.create_fake_skll_learner(df_coef)

        # there's no OLS fit object in this case
        fit = None

        # we used only the non-zero features
        used_features = non_zero_features

        return learner, fit, df_coef, used_features

Source File: modeler.py From rsmtool with Apache License 2.0

4 votes

def train_positive_lasso_cv_then_lr(self, df_train, feature_columns):
        """
        Train `PositiveLassoCVThenLR` (formerly empWtLassoBest) -
        First do feature selection using lasso regression optimized
        for log likelihood using cross validation and then use only
        those features to train a second linear regression

        Parameters
        ----------
        df_train : pd.DataFrame
            Data frame containing the features on which
            to train the model.
        feature_columns : list
            A list of feature columns to use in training the model.

        Returns
        -------
        learner : skll.Learner
            The SKLL learner object
        fit : statsmodels.RegressionResults
            A statsmodels regression results object.
        df_coef : pd.DataFrame
            The model coefficients in a data_frame
        used_features : list
            A list of features used in the final model.
        """
        # train a LassoCV outside of SKLL since it's not exposed there
        X = df_train[feature_columns].values
        y = df_train['sc1'].values
        clf = LassoCV(cv=10, positive=True, random_state=1234567890)
        model = clf.fit(X, y)

        # get the non-zero features from this model
        non_zero_features = []
        for feature, coefficient in zip(feature_columns, model.coef_):
            if coefficient != 0:
                non_zero_features.append(feature)

        # now train a new linear regression with just these non-zero features
        X = df_train[non_zero_features]
        X = sm.add_constant(X)
        fit = sm.OLS(df_train['sc1'], X).fit()

        # convert the model parameters into a data frame
        df_coef = self.ols_coefficients_to_dataframe(fit.params)

        # create fake SKLL learner with these coefficients
        learner = self.create_fake_skll_learner(df_coef)

        # we used only the non-zero features
        used_features = non_zero_features

        return learner, fit, df_coef, used_features

Python sklearn.linear_model.LassoCV() Examples