Python sklearn.model_selection.cross_val_predict() Examples

The following are 30 code examples of sklearn.model_selection.cross_val_predict(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: multi_class_classification.py    From edge2vec with BSD 3-Clause "New" or "Revised" License 11 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example #2
Source File: test_data.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_cv_pipeline_precomputed():
    # Cross-validate a regression on four coplanar points with the same
    # value. Use precomputed kernel to ensure Pipeline with KernelCenterer
    # is treated as a _pairwise operation.
    X = np.array([[3, 0, 0], [0, 3, 0], [0, 0, 3], [1, 1, 1]])
    y_true = np.ones((4,))
    K = X.dot(X.T)
    kcent = KernelCenterer()
    pipeline = Pipeline([("kernel_centerer", kcent), ("svr",
                        SVR(gamma='scale'))])

    # did the pipeline set the _pairwise attribute?
    assert pipeline._pairwise

    # test cross-validation, score should be almost perfect
    # NB: this test is pretty vacuous -- it's mainly to test integration
    #     of Pipeline and KernelCenterer
    y_pred = cross_val_predict(pipeline, K, y_true, cv=2)
    assert_array_almost_equal(y_true, y_pred) 
Example #3
Source File: TermDocMatrix.py    From scattertext with Apache License 2.0 6 votes vote down vote up
def get_logistic_regression_coefs_l2(self, category,
                                         clf=RidgeClassifierCV()):
        ''' Computes l2-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        category : str
            category name to score
        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        X = TfidfTransformer().fit_transform(self._X)
        clf.fit(X, y)
        y_hat = cross_val_predict(clf, X, y)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        return clf.coef_[0], acc, baseline 
Example #4
Source File: plotfunctions.py    From DataScience-webapp-with-flask with MIT License 6 votes vote down vote up
def plot_predVSreal(X, y, classifier, cv):
    from sklearn.model_selection import cross_val_predict
    # cross_val_predict returns an array of the same size as `y` where each entry
    # is a prediction obtained by cross validation:
    predicted = cross_val_predict(classifier, X, y, cv=cv)
    plt.gcf().clear()
    plt.scatter(y, predicted, edgecolors=(0, 0, 0))
    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    plt.xlabel('Measured')
    plt.ylabel('Predicted')
    from io import BytesIO
    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)  # rewind to beginning of file
    import base64
    figdata_png = base64.b64encode(figfile.getvalue())
    return figdata_png 
Example #5
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_cross_val_score_predict_groups():
    # Check if ValueError (when groups is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if groups is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv) 
Example #6
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
Example #7
Source File: test_mlp_classifier.py    From muffnn with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96) 
Example #8
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_predict_groups():
    # Check if ValueError (when groups is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if groups is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv) 
Example #9
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_cross_val_predict_unbalanced():
    X, y = make_classification(n_samples=100, n_features=2, n_redundant=0,
                               n_informative=2, n_clusters_per_class=1,
                               random_state=1)
    # Change the first sample to a new class
    y[0] = 2
    clf = LogisticRegression(random_state=1)
    cv = StratifiedKFold(n_splits=2, random_state=1)
    train, test = list(cv.split(X, y))
    yhat_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba")
    assert y[test[0]][0] == 2  # sanity check for further assertions
    assert np.all(yhat_proba[test[0]][:, 2] == 0)
    assert np.all(yhat_proba[test[0]][:, 0:1] > 0)
    assert np.all(yhat_proba[test[1]] > 0)
    assert_array_almost_equal(yhat_proba.sum(axis=1), np.ones(y.shape),
                              decimal=12) 
Example #10
Source File: _test.py    From ibex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _generate_cross_val_predict_test(X, y, est, pd_est, must_match):
    def test(self):
        self.assertEqual(
            hasattr(est, 'predict'),
            hasattr(pd_est, 'predict'))
        if not hasattr(est, 'predict'):
            return
        pd_y_hat = pd_cross_val_predict(pd_est, X, y)
        self.assertTrue(isinstance(pd_y_hat, pd.Series))
        self.assertTrue(pd_y_hat.index.equals(X.index))
        if must_match:
            y_hat = cross_val_predict(est, X.as_matrix(), y.values)
            np.testing.assert_allclose(pd_y_hat, y_hat)
    return test 
Example #11
Source File: stacked_classifiers_standard.py    From baikal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit_predict(self, X, y):
    self.fit(X, y)
    return cross_val_predict(self, X, y, method="predict_proba") 
Example #12
Source File: cross_validation.py    From Pyspatialml with GNU General Public License v3.0 5 votes vote down vote up
def fit(self, X, y=None, groups=None, **fit_params):
        """
        Run fit method with all sets of parameters

        Args
        ----
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning

        groups : array-like, shape = [n_samples], optional
            Training vector groups for cross-validation

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator
        """

        # check estimator and cv methods are valid
        self.cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))

        # check for binary response
        if len(np.unique(y)) > 2:
            raise ValueError('Only a binary response vector is currently supported')

        # check that scoring metric has been specified
        if self.scoring is None:
            raise ValueError('No score function is defined')

        # perform cross validation prediction
        self.y_pred_ = cross_val_predict(
            estimator=self.estimator, X=X, y=y, groups=groups, cv=self.cv,
            method='predict_proba', n_jobs=self.n_jobs, **fit_params)
        self.y_true = y

        # add fold id to the predictions
        self.test_idx_ = [indexes[1] for indexes in self.cv.split(X, y, groups)] 
Example #13
Source File: classifier_selection.py    From causallib with Apache License 2.0 5 votes vote down vote up
def _select_classifier_from_list(candidates, X, A, n_splits=5, seed=None, loss_type='01'):
    accuracies = np.zeros(len(candidates))

    class_weight = compute_class_weight('balanced', np.unique(A), A)[LabelEncoder().fit_transform(A)]

    if n_splits >= 2:
        cv = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
        for model_idx, m in enumerate(candidates):
            if loss_type == '01':
                pred = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}).reshape(-1)
            else:
                ps = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight},
                                       method='predict_proba')
                pred = ps[:, 1]
    else:
        for model_idx, m in enumerate(candidates):
            m.fit(X, A, sample_weight=class_weight)
            if loss_type == '01':
                pred = m.predict(X=X)
            else:
                pred = m.predict_proba(X=X)[:, 1]

    if loss_type == '01':
        accuracies[model_idx] = np.sum(class_weight[pred == A]) / np.sum(class_weight)
    else:
        logl = np.zeros(A.shape)
        logl[A == -1] = np.log(1.0 - pred[A == -1])
        logl[A == 1] = np.log(pred[A == 1])
        accuracies[model_idx] = np.sum(class_weight * logl) / np.sum(class_weight)

    i_best = np.argmax(accuracies)
    # print('accuracies =', accuracies, "accuracies-sorted", sorted(accuracies))
    # print('Selected model {} {}'.format(i_best, candidates[i_best]))
    return candidates[i_best] 
Example #14
Source File: classification_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cross_val_roc_auc(self, clf, cv=3, average="micro"):
        self.roc_auc_exception()
        roc_auc_score = partial(self.roc_auc_score, average=average)
        y_pred = cross_val_predict(clf, self.X, self.y, cv=cv)
        return roc_auc_score(self.y, y_pred) 
Example #15
Source File: classification_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cross_val_f1(self, clf, cv=3, average="binary"):
        average = self.reset_average(average)
        f1_score = partial(self.f1_score, average=average)
        y_pred = cross_val_predict(clf, self.X, self.y, cv=cv)
        return f1_score(self.y, y_pred) 
Example #16
Source File: classification_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cross_val_recall(self, clf, cv=3, average="binary"):
        average = self.reset_average(average)
        recall_score = partial(self.recall_score, average=average)
        y_pred = cross_val_predict(clf, self.X, self.y, cv=cv)
        return recall_score(self.y, y_pred) 
Example #17
Source File: classification_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cross_val_precision(self, clf, cv=3, average="binary"):
        average = self.reset_average(average)
        precision_score = partial(self.precision_score, average=average)
        y_pred = cross_val_predict(clf, self.X, self.y, cv=cv)
        return precision_score(self.y, y_pred) 
Example #18
Source File: sklearn_steps.py    From baikal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _fit_predict_proba(self, X, y):
    self.fit(X, y)
    return cross_val_predict(self, X, y, method="predict_proba") 
Example #19
Source File: sklearn_steps.py    From baikal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _fit_decision_function(self, X, y):
    self.fit(X, y)
    return cross_val_predict(self, X, y, method="decision_function") 
Example #20
Source File: classification_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cross_val_precision_per_class(self, clf, cv=3, average="binary"):
        average = self.reset_average(average)
        precision_score = partial(self.precision_score, average=average)
        y_pred = cross_val_predict(clf, self.X, self.y, cv=cv)
        precision = {}
        for klass in self.classes:
            y_pred_class = np.take(y_pred, self.y[self.y == klass].index, axis=0)
            y_class = self.y[self.y == klass]
            precision[klass] = precision_score(y_class, y_pred_class) 
        return precision 
Example #21
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_predict_proba_shape():
    X, y = make_classification(n_classes=2, n_samples=50, random_state=0)

    preds = cross_val_predict(LogisticRegression(), X, y,
                              method='predict_proba')
    assert_equal(preds.shape, (50, 2))

    X, y = load_iris(return_X_y=True)

    preds = cross_val_predict(LogisticRegression(), X, y,
                              method='predict_proba')
    assert_equal(preds.shape, (150, 3)) 
Example #22
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_predict_log_proba_shape():
    X, y = make_classification(n_classes=2, n_samples=50, random_state=0)

    preds = cross_val_predict(LogisticRegression(), X, y,
                              method='predict_log_proba')
    assert_equal(preds.shape, (50, 2))

    X, y = load_iris(return_X_y=True)

    preds = cross_val_predict(LogisticRegression(), X, y,
                              method='predict_log_proba')
    assert_equal(preds.shape, (150, 3)) 
Example #23
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y2)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cross_val_predict(clf, X_df, y_ser) 
Example #24
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_sparse_prediction():
    # check that cross_val_predict gives same result for sparse and dense input
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          return_indicator=True,
                                          random_state=1)
    X_sparse = csr_matrix(X)
    y_sparse = csr_matrix(y)
    classif = OneVsRestClassifier(SVC(kernel='linear'))
    preds = cross_val_predict(classif, X, y, cv=10)
    preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10)
    preds_sparse = preds_sparse.toarray()
    assert_array_almost_equal(preds_sparse, preds) 
Example #25
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_cross_val_predict_with_method(est):
    iris = load_iris()
    X, y = iris.data, iris.target
    X, y = shuffle(X, y, random_state=0)
    classes = len(set(y))

    kfold = KFold()

    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
    for method in methods:
        predictions = cross_val_predict(est, X, y, method=method)
        assert_equal(len(predictions), len(y))

        expected_predictions = np.zeros([len(y), classes])
        func = getattr(est, method)

        # Naive loop (should be same as cross_val_predict):
        for train, test in kfold.split(X, y):
            est.fit(X[train], y[train])
            expected_predictions[test] = func(X[test])

        predictions = cross_val_predict(est, X, y, method=method,
                                        cv=kfold)
        assert_array_almost_equal(expected_predictions, predictions)

        # Test alternative representations of y
        predictions_y1 = cross_val_predict(est, X, y + 1, method=method,
                                           cv=kfold)
        assert_array_equal(predictions, predictions_y1)

        predictions_y2 = cross_val_predict(est, X, y - 2, method=method,
                                           cv=kfold)
        assert_array_equal(predictions, predictions_y2)

        predictions_ystr = cross_val_predict(est, X, y.astype('str'),
                                             method=method, cv=kfold)
        assert_array_equal(predictions, predictions_ystr) 
Example #26
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_method_checking():
    # Regression test for issue #9639. Tests that cross_val_predict does not
    # check estimator methods (e.g. predict_proba) before fitting
    est = SGDClassifier(loss='log', random_state=2)
    check_cross_val_predict_with_method(est) 
Example #27
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_class_subset():

    X = np.arange(200).reshape(100, 2)
    y = np.array([x//10 for x in range(100)])
    classes = 10

    kfold3 = KFold(n_splits=3)
    kfold4 = KFold(n_splits=4)

    le = LabelEncoder()

    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
    for method in methods:
        est = LogisticRegression()

        # Test with n_splits=3
        predictions = cross_val_predict(est, X, y, method=method,
                                        cv=kfold3)

        # Runs a naive loop (should be same as cross_val_predict):
        expected_predictions = get_expected_predictions(X, y, kfold3, classes,
                                                        est, method)
        assert_array_almost_equal(expected_predictions, predictions)

        # Test with n_splits=4
        predictions = cross_val_predict(est, X, y, method=method,
                                        cv=kfold4)
        expected_predictions = get_expected_predictions(X, y, kfold4, classes,
                                                        est, method)
        assert_array_almost_equal(expected_predictions, predictions)

        # Testing unordered labels
        y = shuffle(np.repeat(range(10), 10), random_state=0)
        predictions = cross_val_predict(est, X, y, method=method,
                                        cv=kfold3)
        y = le.fit_transform(y)
        expected_predictions = get_expected_predictions(X, y, kfold3, classes,
                                                        est, method)
        assert_array_almost_equal(expected_predictions, predictions) 
Example #28
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def check_cross_val_predict_binary(est, X, y, method):
    """Helper for tests of cross_val_predict with binary classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    if y.ndim == 1:
        exp_shape = (len(X),) if method == 'decision_function' else (len(X), 2)
    else:
        exp_shape = y.shape
    expected_predictions = np.zeros(exp_shape)
    for train, test in cv.split(X, y):
        est = clone(est).fit(X[train], y[train])
        expected_predictions[test] = getattr(est, method)(X[test])

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
Example #29
Source File: estimator.py    From EDeN with MIT License 5 votes vote down vote up
def cross_val_predict(self, graphs, targets, cv=5):
        """cross_val_score."""
        x = self.transform(graphs)
        scores = cross_val_predict(
            self.model, x, targets, cv=cv, method='decision_function')
        return scores 
Example #30
Source File: TermDocMatrix.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline