Python sklearn.calibration.CalibratedClassifierCV() Examples
The following are 30
code examples of sklearn.calibration.CalibratedClassifierCV().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.calibration
, or try the search function
.
Example #1
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_float_nozipmap(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, options={id(model): {'zipmap': False}}) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVFloatNoZipMap", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')")
Example #2
Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_sample_weight(): n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test = X[n_samples:] for method in ['sigmoid', 'isotonic']: base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV(base_estimator, method=method) calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) probs_with_sw = calibrated_clf.predict_proba(X_test) # As the weights are used for the calibration, they should still yield # a different predictions calibrated_clf.fit(X_train, y_train) probs_without_sw = calibrated_clf.predict_proba(X_test) diff = np.linalg.norm(probs_with_sw - probs_without_sw) assert_greater(diff, 0.1)
Example #3
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_float(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVFloat", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #4
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_int(self): data = load_digits() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnCalibratedClassifierCVInt-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #5
Source File: test_calibration.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_sample_weight(): n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test = X[n_samples:] for method in ['sigmoid', 'isotonic']: base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV(base_estimator, method=method) calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) probs_with_sw = calibrated_clf.predict_proba(X_test) # As the weights are used for the calibration, they should still yield # a different predictions calibrated_clf.fit(X_train, y_train) probs_without_sw = calibrated_clf.predict_proba(X_test) diff = np.linalg.norm(probs_with_sw - probs_without_sw) assert_greater(diff, 0.1)
Example #6
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_isotonic_float(self): data = load_iris() X, y = data.data, data.target clf = KNeighborsClassifier().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVKNN", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) try: dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVIsotonicFloat") except Exception as e: raise AssertionError("Issue with model\n{}".format( model_onnx)) from e
Example #7
Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_calibration_accepts_ndarray(X): """Test that calibration accepts n-dimensional arrays as input""" y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0] class MockTensorClassifier(BaseEstimator): """A toy estimator that accepts tensor inputs""" def fit(self, X, y): self.classes_ = np.unique(y) return self def decision_function(self, X): # toy decision function that just needs to have the right shape: return X.reshape(X.shape[0], -1).sum(axis=1) calibrated_clf = CalibratedClassifierCV(MockTensorClassifier()) # we should be able to fit this classifier with no error calibrated_clf.fit(X, y)
Example #8
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_binary(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCV", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVBinaryMNB", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #9
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_logistic_regression(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( base_estimator=LogisticRegression(), method='sigmoid').fit(X, y) model_onnx = convert_sklearn( model, "unused", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnCalibratedClassifierCVBinaryLogReg", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #10
Source File: utils.py From kaggle_otto with BSD 3-Clause "New" or "Revised" License | 6 votes |
def make_blender_cv(classifier, x, y, calibrate=False): skf = StratifiedKFold(y, n_folds=5, random_state=23) scores, predictions = [], None for train_index, test_index in skf: if calibrate: # Make training and calibration calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index])) fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index]) else: fitted_classifier = classifier.fit(x[train_index, :], y[train_index]) preds = fitted_classifier.predict_proba(x[test_index, :]) # Free memory calibrated_classifier, fitted_classifier = None, None gc.collect() scores.append(log_loss(y[test_index], preds)) predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds return scores, predictions
Example #11
Source File: test_calibration.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.calibration.CalibratedClassifierCV, calibration.CalibratedClassifierCV)
Example #12
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_calibrated_classifier_cv_rf(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( base_estimator=RandomForestClassifier(n_estimators=2), method='sigmoid').fit(X, y) try: convert_sklearn( model, "unused", [("input", FloatTensorType([None, X.shape[1]]))]) raise AssertionError( "RandomForestClassifier has no decision_function") except RuntimeError as e: assert "cannot implement decision_function" in str(e)
Example #13
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_calibrated_classifier_cv_svc(self): data = load_iris() X, y = data.data, data.target model = CalibratedClassifierCV( base_estimator=SVC(), method='sigmoid').fit(X, y) model_onnx = convert_sklearn( model, "unused", [("input", FloatTensorType([None, X.shape[1]]))]) assert model_onnx is not None
Example #14
Source File: model.py From doccano with MIT License | 5 votes |
def build_model(): estimator = CalibratedClassifierCV(base_estimator=LinearSVC()) return estimator
Example #15
Source File: texture.py From CGvsPhoto with MIT License | 5 votes |
def __init__(self, data_directory, model_directory, dump_data_directory, image_size, keep_PCA = 64, K_gmm = 64, only_green = False, verbose = True): self.model_name = input(" Choose a name for the model : ") self.model_directory = model_directory self.dump_data_directory = dump_data_directory # Initialize hyper-parameters self.image_size = image_size self.keep_PCA = keep_PCA self.K_gmm = K_gmm self.only_green = only_green self.nb_mini_patch = int(image_size/8 - 1)**2 + int(image_size/16 - 1)**2 self.verbose = verbose # Initialize database self.data = il.Database_loader(directory = data_directory, size = image_size, only_green = only_green) # Initialize classifiers self.PCAs = [] for i in range(self.nb_mini_patch): self.PCAs.append(PCA(n_components=keep_PCA)) self.gmm = GaussianMixture(n_components=K_gmm, covariance_type='diag') self.clf_svm = CalibratedClassifierCV(LinearSVC()) self.projector = Projection(red_dim = 128, treshold = 1.2, learning_rate = 0.001, initialization = 'PCA')
Example #16
Source File: test_integration_DFP_IH.py From DESlib with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setup_classifiers(): rng = np.random.RandomState(654321) # Generate a classification dataset X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8], random_state=rng) # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Considering a pool composed of 10 base classifiers model = CalibratedClassifierCV(Perceptron(max_iter=5)) pool_classifiers = BaggingClassifier(model, n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) return pool_classifiers, X_dsel, y_dsel, X_test, y_test
Example #17
Source File: test_des_integration.py From DESlib with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setup_classifiers(encode_labels=None): rng = np.random.RandomState(123456) X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset( encode_labels, rng) model = CalibratedClassifierCV(Perceptron(max_iter=5)) # Train a pool of 100 classifiers pool_classifiers = BaggingClassifier(model, n_estimators=10, random_state=rng) pool_classifiers.fit(X_train, y_train) return pool_classifiers, X_dsel, y_dsel, X_test, y_test
Example #18
Source File: test_calibration.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_calibration_prefit(): """Test calibration for prefitted classifiers""" n_samples = 50 X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_calib, y_calib, sw_calib = \ X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \ sample_weight[n_samples:2 * n_samples] X_test, y_test = X[2 * n_samples:], y[2 * n_samples:] # Naive-Bayes clf = MultinomialNB() clf.fit(X_train, y_train, sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Naive Bayes with calibration for this_X_calib, this_X_test in [(X_calib, X_test), (sparse.csr_matrix(X_calib), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit") for sw in [sw_calib, None]: pc_clf.fit(this_X_calib, y_calib, sample_weight=sw) y_prob = pc_clf.predict_proba(this_X_test) y_pred = pc_clf.predict(this_X_test) prob_pos_pc_clf = y_prob[:, 1] assert_array_equal(y_pred, np.array([0, 1])[np.argmax(y_prob, axis=1)]) assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf))
Example #19
Source File: test_calibration.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_calibration_nan_imputer(): """Test that calibration can accept nan""" X, y = make_classification(n_samples=10, n_features=2, n_informative=2, n_redundant=0, random_state=42) X[0, 0] = np.nan clf = Pipeline( [('imputer', Imputer()), ('rf', RandomForestClassifier(n_estimators=1))]) clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_c.fit(X, y) clf_c.predict(X)
Example #20
Source File: test_calibration.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_calibration_prob_sum(): # Test that sum of probabilities is 1. A non-regression test for # issue #7796 num_classes = 2 X, y = make_classification(n_samples=10, n_features=5, n_classes=num_classes) clf = LinearSVC(C=1.0) clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut()) clf_prob.fit(X, y) probs = clf_prob.predict_proba(X) assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
Example #21
Source File: calibrated_classifier_cv.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, base_estimator=None, method='sigmoid', cv=3): self._hyperparams = { 'base_estimator': base_estimator, 'method': method, 'cv': cv} self._wrapped_model = Op(**self._hyperparams)
Example #22
Source File: predictor.py From auto_ml with MIT License | 5 votes |
def _calibrate_final_model(self, trained_model, X_test, y_test): if X_test is None or y_test is None: print('X_test or y_test was not present while trying to calibrate the final model') print('Please pass in both X_test and y_test to calibrate the final model') print('Skipping model calibration') return trained_model print('Now calibrating the final model so the probability predictions line up with the observed probabilities in the X_test and y_test datasets you passed in.') print('Note: the validation scores printed above are truly validation scores: they were scored before the model was calibrated to this data.') print('However, now that we are calibrating on the X_test and y_test data you gave us, it is no longer accurate to call this data validation data, since the model is being calibrated to it. As such, you must now report a validation score on a different dataset, or report the validation score used above before the model was calibrated to X_test and y_test. ') if len(X_test) < 1000: calibration_method = 'sigmoid' else: calibration_method = 'isotonic' calibrated_classifier = CalibratedClassifierCV(trained_model, method=calibration_method, cv='prefit') # We need to make sure X_test has been processed the exact same way y_test has. X_test_processed = self.transformation_pipeline.transform(X_test) try: calibrated_classifier = calibrated_classifier.fit(X_test_processed, y_test) except TypeError as e: if scipy.sparse.issparse(X_test_processed): X_test_processed = X_test_processed.toarray() calibrated_classifier = calibrated_classifier.fit(X_test_processed, y_test) else: raise(e) return calibrated_classifier
Example #23
Source File: helper.py From mriqc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(self, X, thres=0.5, return_proba=True): """ Predict class for X. The predicted class of an input sample is a vote by the trees in the forest, weighted by their probability estimates. That is, the predicted class is the one with highest mean probability estimate across the trees. """ if self._model == "svc_lin": from sklearn.base import clone from sklearn.calibration import CalibratedClassifierCV clf = CalibratedClassifierCV( clone(self._estimator).set_param(**self._estimator.get_param()) ) train_y = self._Xtrain[[self._rate_column]].values.ravel().tolist() self._estimator = clf.fit(self._Xtrain, train_y) proba = np.array(self._estimator.predict_proba(X)) if proba.shape[1] > 2: pred = (proba > thres).astype(int) else: pred = (proba[:, 1] > thres).astype(int) if return_proba: return proba, pred return pred
Example #24
Source File: utils.py From kaggle_otto with BSD 3-Clause "New" or "Revised" License | 5 votes |
def hold_out_evaluation(classifier, x, y, test_size=0.2, calibrate=False): x_train, y_train, x_valid, y_valid = stratified_split(x, y, test_size) # Train if calibrate: # Make training and calibration calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y_train)) fitted_classifier = calibrated_classifier.fit(x_train, y_train) else: fitted_classifier = classifier.fit(x_train, y_train) # Evaluate score = log_loss(y_valid, fitted_classifier.predict_proba(x_valid)) return score
Example #25
Source File: test_classifier.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_with_calibrated_classifier_cv(self, net_fit, data): from sklearn.calibration import CalibratedClassifierCV cccv = CalibratedClassifierCV(net_fit, cv=2) cccv.fit(*data)
Example #26
Source File: test_classifier.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_with_calibrated_classifier_cv(self, net_fit, data): from sklearn.calibration import CalibratedClassifierCV cccv = CalibratedClassifierCV(net_fit, cv=2) cccv.fit(*data)
Example #27
Source File: testem.py From ibeis with Apache License 2.0 | 5 votes |
def try_rf_classifier(): # TODO: Evaluate TPOT # http://www.randalolson.com/2016/05/08/tpot-a-python-tool-for-automating-data-science/ # https://www.reddit.com/r/MachineLearning/comments/4ij8dw/tpot_a_python_tool_for_automating_machine_learning/ # http://keras.io/ --- unifies tensorflow / theano from sklearn.ensemble import RandomForestClassifier from sklearn.calibration import CalibratedClassifierCV from sklearn.metrics import log_loss # http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_multiclass.html pairwise_feats, labels = random_case_set() X = pairwise_feats y = labels X_train, y_train = X[:600], y[:600] X_valid, y_valid = X[600:800], y[600:800] X_train_valid, y_train_valid = X[:800], y[:800] X_test, y_test = X[800:], y[800:] # Train uncalibrated random forest classifier on whole train and validation # data and evaluate on test data clf = RandomForestClassifier(n_estimators=25) clf.fit(X_train_valid, y_train_valid) clf_probs = clf.predict_proba(X_test) score = log_loss(y_test, clf_probs) print('score = %r' % (score,)) # Train random forest classifier, calibrate on validation data and evaluate # on test data clf = RandomForestClassifier(n_estimators=25) clf.fit(X_train, y_train) clf_probs = clf.predict_proba(X_test) sig_clf = CalibratedClassifierCV(clf, method="sigmoid", cv="prefit") sig_clf.fit(X_valid, y_valid) sig_clf_probs = sig_clf.predict_proba(X_test) sig_score = log_loss(y_test, sig_clf_probs) print('sig_score = %r' % (sig_score,))
Example #28
Source File: linear_svm.py From driverlessai-recipes with Apache License 2.0 | 5 votes |
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): X = dt.Frame(X) orig_cols = list(X.names) if self.num_classes >= 2: mod = linsvc(random_state=self.random_state, C=self.params["C"], penalty=self.params["penalty"], loss=self.params["loss"], dual=self.params["dual"]) kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=self.random_state) model = CalibratedClassifierCV(base_estimator=mod, method='isotonic', cv=kf) lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) else: model = LinearSVR(epsilon=self.params["epsilon"], C=self.params["C"], loss=self.params["loss"], dual=self.params["dual"], random_state=self.random_state) self.means = dict() self.standard_scaler = StandardScaler() for col in X.names: XX = X[:, col] self.means[col] = XX.mean1() if self.means[col] is None: self.means[col] = 0 XX.replace(None, self.means[col]) X[:, col] = XX assert X[dt.isna(dt.f[col]), col].nrows == 0 X = X.to_numpy() X = self.standard_scaler.fit_transform(X) model.fit(X, y, sample_weight=sample_weight) importances = np.array([0.0 for k in range(len(orig_cols))]) if self.num_classes >= 2: for classifier in model.calibrated_classifiers_: importances += np.array(abs(classifier.base_estimator.get_coeff())) else: importances += np.array(abs(model.coef_[0])) self.set_model_properties(model=model, features=orig_cols, importances=importances.tolist(), # abs(model.coef_[0]) iterations=0)
Example #29
Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_calibration_prob_sum(): # Test that sum of probabilities is 1. A non-regression test for # issue #7796 num_classes = 2 X, y = make_classification(n_samples=10, n_features=5, n_classes=num_classes) clf = LinearSVC(C=1.0) clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut()) clf_prob.fit(X, y) probs = clf_prob.predict_proba(X) assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
Example #30
Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_calibration_nan_imputer(): """Test that calibration can accept nan""" X, y = make_classification(n_samples=10, n_features=2, n_informative=2, n_redundant=0, random_state=42) X[0, 0] = np.nan clf = Pipeline( [('imputer', SimpleImputer()), ('rf', RandomForestClassifier(n_estimators=1))]) clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_c.fit(X, y) clf_c.predict(X)