Python sklearn.calibration.CalibratedClassifierCV() Examples

The following are 30 code examples of sklearn.calibration.CalibratedClassifierCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.calibration , or try the search function .
Example #1
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_float_nozipmap(self):
        data = load_iris()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model, "scikit-learn CalibratedClassifierCVMNB",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET,
            options={id(model): {'zipmap': False}})
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32), model, model_onnx,
            basename="SklearnCalibratedClassifierCVFloatNoZipMap",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')") 
Example #2
Source File: test_calibration.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_sample_weight():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1) 
Example #3
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_float(self):
        data = load_iris()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVMNB",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVFloat",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #4
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_int(self):
        data = load_digits()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVMNB",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVInt-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #5
Source File: test_calibration.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_sample_weight():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1) 
Example #6
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_isotonic_float(self):
        data = load_iris()
        X, y = data.data, data.target
        clf = KNeighborsClassifier().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVKNN",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        try:
            dump_data_and_model(
                X.astype(np.float32),
                model,
                model_onnx,
                basename="SklearnCalibratedClassifierCVIsotonicFloat")
        except Exception as e:
            raise AssertionError("Issue with model\n{}".format(
                model_onnx)) from e 
Example #7
Source File: test_calibration.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_calibration_accepts_ndarray(X):
    """Test that calibration accepts n-dimensional arrays as input"""
    y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0]

    class MockTensorClassifier(BaseEstimator):
        """A toy estimator that accepts tensor inputs"""

        def fit(self, X, y):
            self.classes_ = np.unique(y)
            return self

        def decision_function(self, X):
            # toy decision function that just needs to have the right shape:
            return X.reshape(X.shape[0], -1).sum(axis=1)

    calibrated_clf = CalibratedClassifierCV(MockTensorClassifier())
    # we should be able to fit this classifier with no error
    calibrated_clf.fit(X, y) 
Example #8
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_binary(self):
        data = load_iris()
        X, y = data.data, data.target
        y[y > 1] = 1
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCV",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVBinaryMNB",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #9
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_calibrated_classifier_cv_logistic_regression(self):
        data = load_iris()
        X, y = data.data, data.target
        y[y > 1] = 1
        model = CalibratedClassifierCV(
            base_estimator=LogisticRegression(), method='sigmoid').fit(X, y)
        model_onnx = convert_sklearn(
            model, "unused",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVBinaryLogReg",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #10
Source File: utils.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def make_blender_cv(classifier, x, y, calibrate=False):
    skf = StratifiedKFold(y, n_folds=5, random_state=23)
    scores, predictions = [], None
    for train_index, test_index in skf:
        if calibrate:
            # Make training and calibration
            calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index]))
            fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index])
        else:
            fitted_classifier = classifier.fit(x[train_index, :], y[train_index])
        preds = fitted_classifier.predict_proba(x[test_index, :])

        # Free memory
        calibrated_classifier, fitted_classifier = None, None
        gc.collect()

        scores.append(log_loss(y[test_index], preds))
        predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds
    return scores, predictions 
Example #11
Source File: test_calibration.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.calibration.CalibratedClassifierCV,
                      calibration.CalibratedClassifierCV) 
Example #12
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_calibrated_classifier_cv_rf(self):
        data = load_iris()
        X, y = data.data, data.target
        y[y > 1] = 1
        model = CalibratedClassifierCV(
            base_estimator=RandomForestClassifier(n_estimators=2),
            method='sigmoid').fit(X, y)
        try:
            convert_sklearn(
                model, "unused",
                [("input", FloatTensorType([None, X.shape[1]]))])
            raise AssertionError(
                "RandomForestClassifier has no decision_function")
        except RuntimeError as e:
            assert "cannot implement decision_function" in str(e) 
Example #13
Source File: test_sklearn_calibrated_classifier_cv_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_calibrated_classifier_cv_svc(self):
        data = load_iris()
        X, y = data.data, data.target
        model = CalibratedClassifierCV(
            base_estimator=SVC(),
            method='sigmoid').fit(X, y)
        model_onnx = convert_sklearn(
                model, "unused",
                [("input", FloatTensorType([None, X.shape[1]]))])
        assert model_onnx is not None 
Example #14
Source File: model.py    From doccano with MIT License 5 votes vote down vote up
def build_model():
    estimator = CalibratedClassifierCV(base_estimator=LinearSVC())

    return estimator 
Example #15
Source File: texture.py    From CGvsPhoto with MIT License 5 votes vote down vote up
def __init__(self, data_directory, model_directory, dump_data_directory, image_size, 
				 keep_PCA = 64, K_gmm = 64, only_green = False, verbose = True): 


		self.model_name = input("   Choose a name for the model : ")
		self.model_directory = model_directory
		self.dump_data_directory = dump_data_directory

		# Initialize hyper-parameters
		self.image_size = image_size
		self.keep_PCA = keep_PCA
		self.K_gmm = K_gmm
		self.only_green = only_green
		self.nb_mini_patch = int(image_size/8 - 1)**2 + int(image_size/16 - 1)**2 

		self.verbose = verbose
		# Initialize database
		self.data = il.Database_loader(directory = data_directory, 
							  		   size = image_size, 
							  		   only_green = only_green)





		# Initialize classifiers
		self.PCAs = []
		for i in range(self.nb_mini_patch):
			self.PCAs.append(PCA(n_components=keep_PCA))

		self.gmm = GaussianMixture(n_components=K_gmm, 
								   covariance_type='diag')
		self.clf_svm = CalibratedClassifierCV(LinearSVC())

		self.projector = Projection(red_dim = 128, treshold = 1.2,
									learning_rate = 0.001, 
									initialization = 'PCA') 
Example #16
Source File: test_integration_DFP_IH.py    From DESlib with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setup_classifiers():
    rng = np.random.RandomState(654321)

    # Generate a classification dataset
    X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8],
                               random_state=rng)
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)

    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)

    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                        test_size=0.5,
                                                        random_state=rng)
    # Considering a pool composed of 10 base classifiers
    model = CalibratedClassifierCV(Perceptron(max_iter=5))

    pool_classifiers = BaggingClassifier(model, n_estimators=100,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)
    return pool_classifiers, X_dsel, y_dsel, X_test, y_test 
Example #17
Source File: test_des_integration.py    From DESlib with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setup_classifiers(encode_labels=None):
    rng = np.random.RandomState(123456)

    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(
        encode_labels, rng)
    model = CalibratedClassifierCV(Perceptron(max_iter=5))
    # Train a pool of 100 classifiers
    pool_classifiers = BaggingClassifier(model, n_estimators=10,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)
    return pool_classifiers, X_dsel, y_dsel, X_test, y_test 
Example #18
Source File: test_calibration.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_calibration_prefit():
    """Test calibration for prefitted classifiers"""
    n_samples = 50
    X, y = make_classification(n_samples=3 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_calib, y_calib, sw_calib = \
        X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
        sample_weight[n_samples:2 * n_samples]
    X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]

    # Naive-Bayes
    clf = MultinomialNB()
    clf.fit(X_train, y_train, sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    # Naive Bayes with calibration
    for this_X_calib, this_X_test in [(X_calib, X_test),
                                      (sparse.csr_matrix(X_calib),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")

            for sw in [sw_calib, None]:
                pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
                y_prob = pc_clf.predict_proba(this_X_test)
                y_pred = pc_clf.predict(this_X_test)
                prob_pos_pc_clf = y_prob[:, 1]
                assert_array_equal(y_pred,
                                   np.array([0, 1])[np.argmax(y_prob, axis=1)])

                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss(y_test, prob_pos_pc_clf)) 
Example #19
Source File: test_calibration.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_calibration_nan_imputer():
    """Test that calibration can accept nan"""
    X, y = make_classification(n_samples=10, n_features=2,
                               n_informative=2, n_redundant=0,
                               random_state=42)
    X[0, 0] = np.nan
    clf = Pipeline(
        [('imputer', Imputer()),
         ('rf', RandomForestClassifier(n_estimators=1))])
    clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
    clf_c.fit(X, y)
    clf_c.predict(X) 
Example #20
Source File: test_calibration.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_calibration_prob_sum():
    # Test that sum of probabilities is 1. A non-regression test for
    # issue #7796
    num_classes = 2
    X, y = make_classification(n_samples=10, n_features=5,
                               n_classes=num_classes)
    clf = LinearSVC(C=1.0)
    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    clf_prob.fit(X, y)

    probs = clf_prob.predict_proba(X)
    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0])) 
Example #21
Source File: calibrated_classifier_cv.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, base_estimator=None, method='sigmoid', cv=3):
        self._hyperparams = {
            'base_estimator': base_estimator,
            'method': method,
            'cv': cv}
        self._wrapped_model = Op(**self._hyperparams) 
Example #22
Source File: predictor.py    From auto_ml with MIT License 5 votes vote down vote up
def _calibrate_final_model(self, trained_model, X_test, y_test):

        if X_test is None or y_test is None:
            print('X_test or y_test was not present while trying to calibrate the final model')
            print('Please pass in both X_test and y_test to calibrate the final model')
            print('Skipping model calibration')
            return trained_model

        print('Now calibrating the final model so the probability predictions line up with the observed probabilities in the X_test and y_test datasets you passed in.')
        print('Note: the validation scores printed above are truly validation scores: they were scored before the model was calibrated to this data.')
        print('However, now that we are calibrating on the X_test and y_test data you gave us, it is no longer accurate to call this data validation data, since the model is being calibrated to it. As such, you must now report a validation score on a different dataset, or report the validation score used above before the model was calibrated to X_test and y_test. ')

        if len(X_test) < 1000:
            calibration_method = 'sigmoid'
        else:
            calibration_method = 'isotonic'

        calibrated_classifier = CalibratedClassifierCV(trained_model, method=calibration_method, cv='prefit')

        # We need to make sure X_test has been processed the exact same way y_test has.
        X_test_processed = self.transformation_pipeline.transform(X_test)

        try:
            calibrated_classifier = calibrated_classifier.fit(X_test_processed, y_test)
        except TypeError as e:
            if scipy.sparse.issparse(X_test_processed):
                X_test_processed = X_test_processed.toarray()

                calibrated_classifier = calibrated_classifier.fit(X_test_processed, y_test)
            else:
                raise(e)

        return calibrated_classifier 
Example #23
Source File: helper.py    From mriqc with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def predict(self, X, thres=0.5, return_proba=True):
        """

        Predict class for X.
        The predicted class of an input sample is a vote by the trees in
        the forest, weighted by their probability estimates. That is,
        the predicted class is the one with highest mean probability
        estimate across the trees.

        """

        if self._model == "svc_lin":
            from sklearn.base import clone
            from sklearn.calibration import CalibratedClassifierCV

            clf = CalibratedClassifierCV(
                clone(self._estimator).set_param(**self._estimator.get_param())
            )
            train_y = self._Xtrain[[self._rate_column]].values.ravel().tolist()
            self._estimator = clf.fit(self._Xtrain, train_y)

        proba = np.array(self._estimator.predict_proba(X))

        if proba.shape[1] > 2:
            pred = (proba > thres).astype(int)
        else:
            pred = (proba[:, 1] > thres).astype(int)

        if return_proba:
            return proba, pred

        return pred 
Example #24
Source File: utils.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def hold_out_evaluation(classifier, x, y, test_size=0.2, calibrate=False):
    x_train, y_train, x_valid, y_valid = stratified_split(x, y, test_size)

    # Train
    if calibrate:
        # Make training and calibration
        calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y_train))
        fitted_classifier = calibrated_classifier.fit(x_train, y_train)
    else:
        fitted_classifier = classifier.fit(x_train, y_train)
    # Evaluate
    score = log_loss(y_valid, fitted_classifier.predict_proba(x_valid))

    return score 
Example #25
Source File: test_classifier.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_with_calibrated_classifier_cv(self, net_fit, data):
        from sklearn.calibration import CalibratedClassifierCV
        cccv = CalibratedClassifierCV(net_fit, cv=2)
        cccv.fit(*data) 
Example #26
Source File: test_classifier.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_with_calibrated_classifier_cv(self, net_fit, data):
        from sklearn.calibration import CalibratedClassifierCV
        cccv = CalibratedClassifierCV(net_fit, cv=2)
        cccv.fit(*data) 
Example #27
Source File: testem.py    From ibeis with Apache License 2.0 5 votes vote down vote up
def try_rf_classifier():
    # TODO: Evaluate TPOT
    # http://www.randalolson.com/2016/05/08/tpot-a-python-tool-for-automating-data-science/
    # https://www.reddit.com/r/MachineLearning/comments/4ij8dw/tpot_a_python_tool_for_automating_machine_learning/
    # http://keras.io/ --- unifies tensorflow / theano
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.calibration import CalibratedClassifierCV
    from sklearn.metrics import log_loss

    # http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_multiclass.html
    pairwise_feats, labels = random_case_set()
    X = pairwise_feats
    y = labels
    X_train, y_train = X[:600], y[:600]
    X_valid, y_valid = X[600:800], y[600:800]
    X_train_valid, y_train_valid = X[:800], y[:800]
    X_test, y_test = X[800:], y[800:]

    # Train uncalibrated random forest classifier on whole train and validation
    # data and evaluate on test data
    clf = RandomForestClassifier(n_estimators=25)
    clf.fit(X_train_valid, y_train_valid)
    clf_probs = clf.predict_proba(X_test)
    score = log_loss(y_test, clf_probs)
    print('score = %r' % (score,))

    # Train random forest classifier, calibrate on validation data and evaluate
    # on test data
    clf = RandomForestClassifier(n_estimators=25)
    clf.fit(X_train, y_train)
    clf_probs = clf.predict_proba(X_test)
    sig_clf = CalibratedClassifierCV(clf, method="sigmoid", cv="prefit")
    sig_clf.fit(X_valid, y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test)
    sig_score = log_loss(y_test, sig_clf_probs)
    print('sig_score = %r' % (sig_score,)) 
Example #28
Source File: linear_svm.py    From driverlessai-recipes with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
        X = dt.Frame(X)

        orig_cols = list(X.names)

        if self.num_classes >= 2:
            mod = linsvc(random_state=self.random_state, C=self.params["C"], penalty=self.params["penalty"],
                         loss=self.params["loss"], dual=self.params["dual"])
            kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=self.random_state)
            model = CalibratedClassifierCV(base_estimator=mod, method='isotonic', cv=kf)
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
        else:
            model = LinearSVR(epsilon=self.params["epsilon"], C=self.params["C"], loss=self.params["loss"],
                              dual=self.params["dual"], random_state=self.random_state)
        self.means = dict()
        self.standard_scaler = StandardScaler()
        for col in X.names:
            XX = X[:, col]
            self.means[col] = XX.mean1()
            if self.means[col] is None:
                self.means[col] = 0
            XX.replace(None, self.means[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()
        X = self.standard_scaler.fit_transform(X)
        model.fit(X, y, sample_weight=sample_weight)
        importances = np.array([0.0 for k in range(len(orig_cols))])
        if self.num_classes >= 2:
            for classifier in model.calibrated_classifiers_:
                importances += np.array(abs(classifier.base_estimator.get_coeff()))
        else:
            importances += np.array(abs(model.coef_[0]))

        self.set_model_properties(model=model,
                                  features=orig_cols,
                                  importances=importances.tolist(),  # abs(model.coef_[0])
                                  iterations=0) 
Example #29
Source File: test_calibration.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_calibration_prob_sum():
    # Test that sum of probabilities is 1. A non-regression test for
    # issue #7796
    num_classes = 2
    X, y = make_classification(n_samples=10, n_features=5,
                               n_classes=num_classes)
    clf = LinearSVC(C=1.0)
    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    clf_prob.fit(X, y)

    probs = clf_prob.predict_proba(X)
    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0])) 
Example #30
Source File: test_calibration.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_calibration_nan_imputer():
    """Test that calibration can accept nan"""
    X, y = make_classification(n_samples=10, n_features=2,
                               n_informative=2, n_redundant=0,
                               random_state=42)
    X[0, 0] = np.nan
    clf = Pipeline(
        [('imputer', SimpleImputer()),
         ('rf', RandomForestClassifier(n_estimators=1))])
    clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
    clf_c.fit(X, y)
    clf_c.predict(X)