Python sklearn.model_selection.cross_val_predict() Examples
The following are 30
code examples of sklearn.model_selection.cross_val_predict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: multi_class_classification.py From edge2vec with BSD 3-Clause "New" or "Revised" License | 11 votes |
def multi_class_classification(data_X,data_Y): ''' calculate multi-class classification and return related evaluation metrics ''' svc = svm.SVC(C=1, kernel='linear') # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) clf = svc.fit(data_X, data_Y) #svm # array = svc.coef_ # print array predicted = cross_val_predict(clf, data_X, data_Y, cv=2) print "accuracy",metrics.accuracy_score(data_Y, predicted) print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') print "precision score",metrics.precision_score(data_Y, predicted, average='macro') print "recall score",metrics.recall_score(data_Y, predicted, average='macro') print "hamming_loss",metrics.hamming_loss(data_Y, predicted) print "classification_report", metrics.classification_report(data_Y, predicted) print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted) # print "log_loss", metrics.log_loss(data_Y, predicted) print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted) # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted) # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)
Example #2
Source File: test_data.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cv_pipeline_precomputed(): # Cross-validate a regression on four coplanar points with the same # value. Use precomputed kernel to ensure Pipeline with KernelCenterer # is treated as a _pairwise operation. X = np.array([[3, 0, 0], [0, 3, 0], [0, 0, 3], [1, 1, 1]]) y_true = np.ones((4,)) K = X.dot(X.T) kcent = KernelCenterer() pipeline = Pipeline([("kernel_centerer", kcent), ("svr", SVR(gamma='scale'))]) # did the pipeline set the _pairwise attribute? assert pipeline._pairwise # test cross-validation, score should be almost perfect # NB: this test is pretty vacuous -- it's mainly to test integration # of Pipeline and KernelCenterer y_pred = cross_val_predict(pipeline, K, y_true, cv=2) assert_array_almost_equal(y_true, y_pred)
Example #3
Source File: TermDocMatrix.py From scattertext with Apache License 2.0 | 6 votes |
def get_logistic_regression_coefs_l2(self, category, clf=RidgeClassifierCV()): ''' Computes l2-penalized logistic regression score. Parameters ---------- category : str category name to score category : str category name to score Returns ------- (coefficient array, accuracy, majority class baseline accuracy) ''' try: from sklearn.cross_validation import cross_val_predict except: from sklearn.model_selection import cross_val_predict y = self._get_mask_from_category(category) X = TfidfTransformer().fit_transform(self._X) clf.fit(X, y) y_hat = cross_val_predict(clf, X, y) acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat) return clf.coef_[0], acc, baseline
Example #4
Source File: plotfunctions.py From DataScience-webapp-with-flask with MIT License | 6 votes |
def plot_predVSreal(X, y, classifier, cv): from sklearn.model_selection import cross_val_predict # cross_val_predict returns an array of the same size as `y` where each entry # is a prediction obtained by cross validation: predicted = cross_val_predict(classifier, X, y, cv=cv) plt.gcf().clear() plt.scatter(y, predicted, edgecolors=(0, 0, 0)) plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) plt.xlabel('Measured') plt.ylabel('Predicted') from io import BytesIO figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) # rewind to beginning of file import base64 figdata_png = base64.b64encode(figfile.getvalue()) return figdata_png
Example #5
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cross_val_score_predict_groups(): # Check if ValueError (when groups is None) propagates to cross_val_score # and cross_val_predict # And also check if groups is correctly passed to the cv object X, y = make_classification(n_samples=20, n_classes=2, random_state=0) clf = SVC(kernel="linear") group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_score, estimator=clf, X=X, y=y, cv=cv) assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
Example #6
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_cross_val_predict_multiclass(est, X, y, method): """Helper for tests of cross_val_predict with multiclass classification""" cv = KFold(n_splits=3, shuffle=False) # Generate expected outputs float_min = np.finfo(np.float64).min default_values = {'decision_function': float_min, 'predict_log_proba': float_min, 'predict_proba': 0} expected_predictions = np.full((len(X), len(set(y))), default_values[method], dtype=np.float64) _, y_enc = np.unique(y, return_inverse=True) for train, test in cv.split(X, y_enc): est = clone(est).fit(X[train], y_enc[train]) fold_preds = getattr(est, method)(X[test]) i_cols_fit = np.unique(y_enc[train]) expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions)
Example #7
Source File: test_mlp_classifier.py From muffnn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_cross_val_predict(): # Make sure it works in cross_val_predict for multiclass. X, y = load_iris(return_X_y=True) y = LabelBinarizer().fit_transform(y) X = StandardScaler().fit_transform(X) mlp = MLPClassifier(n_epochs=10, solver_kwargs={'learning_rate': 0.05}, random_state=4567).fit(X, y) cv = KFold(n_splits=4, random_state=457, shuffle=True) y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba') auc = roc_auc_score(y, y_oos, average=None) assert np.all(auc >= 0.96)
Example #8
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_cross_val_score_predict_groups(): # Check if ValueError (when groups is None) propagates to cross_val_score # and cross_val_predict # And also check if groups is correctly passed to the cv object X, y = make_classification(n_samples=20, n_classes=2, random_state=0) clf = SVC(kernel="linear") group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_score, estimator=clf, X=X, y=y, cv=cv) assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
Example #9
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cross_val_predict_unbalanced(): X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, random_state=1) # Change the first sample to a new class y[0] = 2 clf = LogisticRegression(random_state=1) cv = StratifiedKFold(n_splits=2, random_state=1) train, test = list(cv.split(X, y)) yhat_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba") assert y[test[0]][0] == 2 # sanity check for further assertions assert np.all(yhat_proba[test[0]][:, 2] == 0) assert np.all(yhat_proba[test[0]][:, 0:1] > 0) assert np.all(yhat_proba[test[1]] > 0) assert_array_almost_equal(yhat_proba.sum(axis=1), np.ones(y.shape), decimal=12)
Example #10
Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _generate_cross_val_predict_test(X, y, est, pd_est, must_match): def test(self): self.assertEqual( hasattr(est, 'predict'), hasattr(pd_est, 'predict')) if not hasattr(est, 'predict'): return pd_y_hat = pd_cross_val_predict(pd_est, X, y) self.assertTrue(isinstance(pd_y_hat, pd.Series)) self.assertTrue(pd_y_hat.index.equals(X.index)) if must_match: y_hat = cross_val_predict(est, X.as_matrix(), y.values) np.testing.assert_allclose(pd_y_hat, y_hat) return test
Example #11
Source File: stacked_classifiers_standard.py From baikal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit_predict(self, X, y): self.fit(X, y) return cross_val_predict(self, X, y, method="predict_proba")
Example #12
Source File: cross_validation.py From Pyspatialml with GNU General Public License v3.0 | 5 votes |
def fit(self, X, y=None, groups=None, **fit_params): """ Run fit method with all sets of parameters Args ---- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning groups : array-like, shape = [n_samples], optional Training vector groups for cross-validation **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator """ # check estimator and cv methods are valid self.cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator)) # check for binary response if len(np.unique(y)) > 2: raise ValueError('Only a binary response vector is currently supported') # check that scoring metric has been specified if self.scoring is None: raise ValueError('No score function is defined') # perform cross validation prediction self.y_pred_ = cross_val_predict( estimator=self.estimator, X=X, y=y, groups=groups, cv=self.cv, method='predict_proba', n_jobs=self.n_jobs, **fit_params) self.y_true = y # add fold id to the predictions self.test_idx_ = [indexes[1] for indexes in self.cv.split(X, y, groups)]
Example #13
Source File: classifier_selection.py From causallib with Apache License 2.0 | 5 votes |
def _select_classifier_from_list(candidates, X, A, n_splits=5, seed=None, loss_type='01'): accuracies = np.zeros(len(candidates)) class_weight = compute_class_weight('balanced', np.unique(A), A)[LabelEncoder().fit_transform(A)] if n_splits >= 2: cv = KFold(n_splits=n_splits, shuffle=True, random_state=seed) for model_idx, m in enumerate(candidates): if loss_type == '01': pred = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}).reshape(-1) else: ps = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}, method='predict_proba') pred = ps[:, 1] else: for model_idx, m in enumerate(candidates): m.fit(X, A, sample_weight=class_weight) if loss_type == '01': pred = m.predict(X=X) else: pred = m.predict_proba(X=X)[:, 1] if loss_type == '01': accuracies[model_idx] = np.sum(class_weight[pred == A]) / np.sum(class_weight) else: logl = np.zeros(A.shape) logl[A == -1] = np.log(1.0 - pred[A == -1]) logl[A == 1] = np.log(pred[A == 1]) accuracies[model_idx] = np.sum(class_weight * logl) / np.sum(class_weight) i_best = np.argmax(accuracies) # print('accuracies =', accuracies, "accuracies-sorted", sorted(accuracies)) # print('Selected model {} {}'.format(i_best, candidates[i_best])) return candidates[i_best]
Example #14
Source File: classification_tests.py From drifter_ml with MIT License | 5 votes |
def cross_val_roc_auc(self, clf, cv=3, average="micro"): self.roc_auc_exception() roc_auc_score = partial(self.roc_auc_score, average=average) y_pred = cross_val_predict(clf, self.X, self.y, cv=cv) return roc_auc_score(self.y, y_pred)
Example #15
Source File: classification_tests.py From drifter_ml with MIT License | 5 votes |
def cross_val_f1(self, clf, cv=3, average="binary"): average = self.reset_average(average) f1_score = partial(self.f1_score, average=average) y_pred = cross_val_predict(clf, self.X, self.y, cv=cv) return f1_score(self.y, y_pred)
Example #16
Source File: classification_tests.py From drifter_ml with MIT License | 5 votes |
def cross_val_recall(self, clf, cv=3, average="binary"): average = self.reset_average(average) recall_score = partial(self.recall_score, average=average) y_pred = cross_val_predict(clf, self.X, self.y, cv=cv) return recall_score(self.y, y_pred)
Example #17
Source File: classification_tests.py From drifter_ml with MIT License | 5 votes |
def cross_val_precision(self, clf, cv=3, average="binary"): average = self.reset_average(average) precision_score = partial(self.precision_score, average=average) y_pred = cross_val_predict(clf, self.X, self.y, cv=cv) return precision_score(self.y, y_pred)
Example #18
Source File: sklearn_steps.py From baikal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _fit_predict_proba(self, X, y): self.fit(X, y) return cross_val_predict(self, X, y, method="predict_proba")
Example #19
Source File: sklearn_steps.py From baikal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _fit_decision_function(self, X, y): self.fit(X, y) return cross_val_predict(self, X, y, method="decision_function")
Example #20
Source File: classification_tests.py From drifter_ml with MIT License | 5 votes |
def cross_val_precision_per_class(self, clf, cv=3, average="binary"): average = self.reset_average(average) precision_score = partial(self.precision_score, average=average) y_pred = cross_val_predict(clf, self.X, self.y, cv=cv) precision = {} for klass in self.classes: y_pred_class = np.take(y_pred, self.y[self.y == klass].index, axis=0) y_class = self.y[self.y == klass] precision[klass] = precision_score(y_class, y_pred_class) return precision
Example #21
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_predict_proba_shape(): X, y = make_classification(n_classes=2, n_samples=50, random_state=0) preds = cross_val_predict(LogisticRegression(), X, y, method='predict_proba') assert_equal(preds.shape, (50, 2)) X, y = load_iris(return_X_y=True) preds = cross_val_predict(LogisticRegression(), X, y, method='predict_proba') assert_equal(preds.shape, (150, 3))
Example #22
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_predict_log_proba_shape(): X, y = make_classification(n_classes=2, n_samples=50, random_state=0) preds = cross_val_predict(LogisticRegression(), X, y, method='predict_log_proba') assert_equal(preds.shape, (50, 2)) X, y = load_iris(return_X_y=True) preds = cross_val_predict(LogisticRegression(), X, y, method='predict_log_proba') assert_equal(preds.shape, (150, 3))
Example #23
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: pass for TargetType, InputFeatureType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y2) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) cross_val_predict(clf, X_df, y_ser)
Example #24
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_sparse_prediction(): # check that cross_val_predict gives same result for sparse and dense input X, y = make_multilabel_classification(n_classes=2, n_labels=1, allow_unlabeled=False, return_indicator=True, random_state=1) X_sparse = csr_matrix(X) y_sparse = csr_matrix(y) classif = OneVsRestClassifier(SVC(kernel='linear')) preds = cross_val_predict(classif, X, y, cv=10) preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10) preds_sparse = preds_sparse.toarray() assert_array_almost_equal(preds_sparse, preds)
Example #25
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_cross_val_predict_with_method(est): iris = load_iris() X, y = iris.data, iris.target X, y = shuffle(X, y, random_state=0) classes = len(set(y)) kfold = KFold() methods = ['decision_function', 'predict_proba', 'predict_log_proba'] for method in methods: predictions = cross_val_predict(est, X, y, method=method) assert_equal(len(predictions), len(y)) expected_predictions = np.zeros([len(y), classes]) func = getattr(est, method) # Naive loop (should be same as cross_val_predict): for train, test in kfold.split(X, y): est.fit(X[train], y[train]) expected_predictions[test] = func(X[test]) predictions = cross_val_predict(est, X, y, method=method, cv=kfold) assert_array_almost_equal(expected_predictions, predictions) # Test alternative representations of y predictions_y1 = cross_val_predict(est, X, y + 1, method=method, cv=kfold) assert_array_equal(predictions, predictions_y1) predictions_y2 = cross_val_predict(est, X, y - 2, method=method, cv=kfold) assert_array_equal(predictions, predictions_y2) predictions_ystr = cross_val_predict(est, X, y.astype('str'), method=method, cv=kfold) assert_array_equal(predictions, predictions_ystr)
Example #26
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_method_checking(): # Regression test for issue #9639. Tests that cross_val_predict does not # check estimator methods (e.g. predict_proba) before fitting est = SGDClassifier(loss='log', random_state=2) check_cross_val_predict_with_method(est)
Example #27
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_val_predict_class_subset(): X = np.arange(200).reshape(100, 2) y = np.array([x//10 for x in range(100)]) classes = 10 kfold3 = KFold(n_splits=3) kfold4 = KFold(n_splits=4) le = LabelEncoder() methods = ['decision_function', 'predict_proba', 'predict_log_proba'] for method in methods: est = LogisticRegression() # Test with n_splits=3 predictions = cross_val_predict(est, X, y, method=method, cv=kfold3) # Runs a naive loop (should be same as cross_val_predict): expected_predictions = get_expected_predictions(X, y, kfold3, classes, est, method) assert_array_almost_equal(expected_predictions, predictions) # Test with n_splits=4 predictions = cross_val_predict(est, X, y, method=method, cv=kfold4) expected_predictions = get_expected_predictions(X, y, kfold4, classes, est, method) assert_array_almost_equal(expected_predictions, predictions) # Testing unordered labels y = shuffle(np.repeat(range(10), 10), random_state=0) predictions = cross_val_predict(est, X, y, method=method, cv=kfold3) y = le.fit_transform(y) expected_predictions = get_expected_predictions(X, y, kfold3, classes, est, method) assert_array_almost_equal(expected_predictions, predictions)
Example #28
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def check_cross_val_predict_binary(est, X, y, method): """Helper for tests of cross_val_predict with binary classification""" cv = KFold(n_splits=3, shuffle=False) # Generate expected outputs if y.ndim == 1: exp_shape = (len(X),) if method == 'decision_function' else (len(X), 2) else: exp_shape = y.shape expected_predictions = np.zeros(exp_shape) for train, test in cv.split(X, y): est = clone(est).fit(X[train], y[train]) expected_predictions[test] = getattr(est, method)(X[test]) # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions)
Example #29
Source File: estimator.py From EDeN with MIT License | 5 votes |
def cross_val_predict(self, graphs, targets, cv=5): """cross_val_score.""" x = self.transform(graphs) scores = cross_val_predict( self.model, x, targets, cv=cv, method='decision_function') return scores
Example #30
Source File: TermDocMatrix.py From scattertext with Apache License 2.0 | 5 votes |
def get_logistic_regression_coefs_l1(self, category, clf=LassoCV(alphas=[0.1, 0.001], max_iter=10000, n_jobs=-1)): ''' Computes l1-penalized logistic regression score. Parameters ---------- category : str category name to score Returns ------- (coefficient array, accuracy, majority class baseline accuracy) ''' try: from sklearn.cross_validation import cross_val_predict except: from sklearn.model_selection import cross_val_predict y = self._get_mask_from_category(category) y_continuous = self._get_continuous_version_boolean_y(y) # X = TfidfTransformer().fit_transform(self._X) X = self._X clf.fit(X, y_continuous) y_hat = (cross_val_predict(clf, X, y_continuous) > 0) acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat) clf.fit(X, y_continuous) return clf.coef_, acc, baseline