Python sklearn.cross_validation.cross_val_predict() Examples

The following are 8 code examples of sklearn.cross_validation.cross_val_predict(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cross_validation , or try the search function .
Example #1
Source File: TermDocMatrix.py    From scattertext with Apache License 2.0 6 votes vote down vote up
def get_logistic_regression_coefs_l2(self, category,
                                         clf=RidgeClassifierCV()):
        ''' Computes l2-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        category : str
            category name to score
        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        X = TfidfTransformer().fit_transform(self._X)
        clf.fit(X, y)
        y_hat = cross_val_predict(clf, X, y)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        return clf.coef_[0], acc, baseline 
Example #2
Source File: TermDocMatrix.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline 
Example #3
Source File: _test.py    From ibex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _generate_cross_val_predict_test(X, y, est, pd_est, must_match):
    def test(self):
        self.assertEqual(
            hasattr(est, 'predict'),
            hasattr(pd_est, 'predict'))
        if not hasattr(est, 'predict'):
            return
        pd_y_hat = pd_cross_val_predict(pd_est, X, y)
        self.assertTrue(isinstance(pd_y_hat, pd.Series))
        self.assertTrue(pd_y_hat.index.equals(X.index))
        if must_match:
            y_hat = cross_val_predict(est, X.as_matrix(), y.values)
            np.testing.assert_allclose(pd_y_hat, y_hat)
    return test 
Example #4
Source File: GetPredictorPerf.py    From ProFET with GNU General Public License v3.0 5 votes vote down vote up
def demo_getPerf(X,y,Classifier,Classifier_label):
    """
    Classifier_type: Sklearn model
        Type of classifier to use, and it's parameters
    Classifier_label: string
        Descriptive Name of the classifier. e.g "Forest"
    """
    results = {}

    scores = cross_val_predict(Classifier, X, y, cv=10, n_jobs=-1)
    results[label] = get_scores(scores,y,Classifier_label)

    res_df = pd.DataFrame(results)
    res_df.to_csv(outputFileName+"tsv", sep='\t') 
Example #5
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv()) 
Example #6
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_input_types():
    clf = Ridge()
    # Smoke test
    predictions = cval.cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_equal(predictions.shape, (10, 2))

    predictions = cval.cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_array_equal(predictions.shape, (10, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cval.cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cval.cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cval.cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (10,)) 
Example #7
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser) 
Example #8
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_sparse_prediction():
    # check that cross_val_predict gives same result for sparse and dense input
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          return_indicator=True,
                                          random_state=1)
    X_sparse = csr_matrix(X)
    y_sparse = csr_matrix(y)
    classif = OneVsRestClassifier(SVC(kernel='linear'))
    preds = cval.cross_val_predict(classif, X, y, cv=10)
    preds_sparse = cval.cross_val_predict(classif, X_sparse, y_sparse, cv=10)
    preds_sparse = preds_sparse.toarray()
    assert_array_almost_equal(preds_sparse, preds)