Python sklearn.base.ClassifierMixin() Examples
The following are 27
code examples of sklearn.base.ClassifierMixin().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.
Example #1
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_4(): """Assert that TPOT template option generates pipeline when one of steps is a specific operator.""" tpot_obj = TPOTClassifier( population_size=5, generations=2, random_state=42, verbosity=0, config_dict = 'TPOT light', template='SelectPercentile-Transformer-Classifier' ) tpot_obj.fit(pretest_X, pretest_y) assert isinstance(tpot_obj._optimized_pipeline, creator.Individual) assert not (tpot_obj._start_datetime is None) sklearn_pipeline = tpot_obj.fitted_pipeline_ operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline) assert operator_count == 3 assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower() assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #2
Source File: test_multioutput.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_classifier_chain_fit_and_predict_with_logistic_regression(): # Fit classifier chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() classifier_chain = ClassifierChain(LogisticRegression()) classifier_chain.fit(X, Y) Y_pred = classifier_chain.predict(X) assert_equal(Y_pred.shape, Y.shape) Y_prob = classifier_chain.predict_proba(X) Y_binary = (Y_prob >= .5) assert_array_equal(Y_binary, Y_pred) assert_equal([c.coef_.size for c in classifier_chain.estimators_], list(range(X.shape[1], X.shape[1] + Y.shape[1]))) assert isinstance(classifier_chain, ClassifierMixin)
Example #3
Source File: stacking.py From stacked_generalization with Apache License 2.0 | 6 votes |
def _get_child_predict(self, clf, X, index=None): if self.stack_by_proba and hasattr(clf, 'predict_proba'): if self.save_stage0 and index is not None: proba = util.saving_predict_proba(clf, X, index) else: proba = clf.predict_proba(X) return proba[:, 1:] elif hasattr(clf, 'predict'): predict_result = clf.predict(X) if isinstance(clf, ClassifierMixin): lb = LabelBinarizer() lb.fit(predict_result) return lb.fit_transform(predict_result) else: return predict_result.reshape((predict_result.size, 1)) else: return clf.fit_transform(X)
Example #4
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_3(): """Assert that TPOT template option generates pipeline when one of steps is a specific operator.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='SelectPercentile-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 3 assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower() assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #5
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_2(): """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='Selector-Selector-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 4 assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)
Example #6
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_1(): """Assert that TPOT template option generates pipeline when each step is a type of operator.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='Selector-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 3 assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #7
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_fit_and_predict(): # Fit base chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() chains = [RegressorChain(Ridge()), ClassifierChain(LogisticRegression())] for chain in chains: chain.fit(X, Y) Y_pred = chain.predict(X) assert_equal(Y_pred.shape, Y.shape) assert_equal([c.coef_.size for c in chain.estimators_], list(range(X.shape[1], X.shape[1] + Y.shape[1]))) Y_prob = chains[1].predict_proba(X) Y_binary = (Y_prob >= .5) assert_array_equal(Y_binary, Y_pred) assert isinstance(chains[1], ClassifierMixin)
Example #8
Source File: mis_classifier.py From autoimpute with MIT License | 6 votes |
def __init__(self, classifier=None, predictors="all"): """Create an instance of the MissingnessClassifier. The MissingnessClassifier inherits from sklearn BaseEstimator and ClassifierMixin. This inheritence and this class' implementation ensure that the MissingnessClassifier is a valid classifier that will work in an sklearn pipeline. Args: classifier (classifier, optional): valid classifier from sklearn. If None, default is xgboost. Note that classifier must conform to sklearn style. This means it must implement the `predict_proba` method and act as a porper classifier. predictors (str, iter, dict, optiona): defaults to all, i.e. use all predictors. If all, every column will be used for every class prediction. If a list, subset of columns used for all predictions. If a dict, specify which columns to use as predictors for each imputation. Columns not specified in dict will receive `all` by default. """ self.classifier = classifier self.predictors = predictors
Example #9
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13): active_fields = _get_column_names(X) if self.active_fields is None or active_fields is None: raise ValueError("Cannot perform model validation with anonymous data") if self.active_fields.tolist() != active_fields.tolist(): raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields)) active_values = _get_values(X) y = self.predict(X, **predict_params) target_values = _get_values(y) estimator = self._final_estimator if isinstance(estimator, BaseEstimator): if isinstance(estimator, RegressorMixin): self.verification = _Verification(active_values, target_values, precision, zeroThreshold) elif isinstance(estimator, ClassifierMixin): self.verification = _Verification(active_values, target_values, precision, zeroThreshold) if hasattr(estimator, "predict_proba"): try: y_proba = self.predict_proba(X, **predict_proba_params) self.verification.probability_values = _get_values(y_proba) except AttributeError: pass # elif isinstance(estimator, H2OEstimator): elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"): if estimator._estimator_type == "regressor": self.verification = _Verification(active_values, target_values, precision, zeroThreshold) elif estimator._estimator_type == "classifier": probability_values = target_values[:, 1:] target_values = target_values[:, 0] self.verification = _Verification(active_values, target_values, precision, zeroThreshold) self.verification.probability_values = probability_values
Example #10
Source File: test_voting.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sample_weight_kwargs(): """Check that VotingClassifier passes sample_weight as kwargs""" class MockClassifier(BaseEstimator, ClassifierMixin): """Mock Classifier to check that sample_weight is received as kwargs""" def fit(self, X, y, *args, **sample_weight): assert 'sample_weight' in sample_weight clf = MockClassifier() eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft') # Should not raise an error. eclf.fit(X, y, sample_weight=np.ones((len(y),)))
Example #11
Source File: test_voting_classifier.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_sample_weight_kwargs(): """Check that VotingClassifier passes sample_weight as kwargs""" class MockClassifier(BaseEstimator, ClassifierMixin): """Mock Classifier to check that sample_weight is received as kwargs""" def fit(self, X, y, *args, **sample_weight): assert_true('sample_weight' in sample_weight) clf = MockClassifier() eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft') # Should not raise an error. eclf.fit(X, y, sample_weight=np.ones((len(y),)))
Example #12
Source File: tree.py From sklearn-pmml with MIT License | 5 votes |
def __init__(self, estimator, context, mode): super(DecisionTreeConverter, self).__init__(estimator, context, mode) assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported' assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute' if mode == ModelMode.CLASSIFICATION: if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature): self.prediction_output = self.OUTPUT_LABEL else: self.prediction_output = self.OUTPUT_PROBABILITY assert isinstance(self.estimator, ClassifierMixin), \ 'Only a classifier can be serialized in classification mode' if mode == ModelMode.REGRESSION: assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \ 'Only a numeric feature can be an output of regression' assert isinstance(self.estimator, RegressorMixin), \ 'Only a regressor can be serialized in regression mode' assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \ 'Tree outputs {} results while the schema specifies {} output fields'.format( estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT])) # create hidden variables for each categorical output # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree # TODO: converter and subclasses for classifier and regression converters internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT])) self.context.schemas[Schema.INTERNAL] = internal_schema
Example #13
Source File: model.py From sklearn-pmml with MIT License | 5 votes |
def __init__(self, estimator, context): """ :param estimator: Estimator to convert :type estimator: BaseEstimator :param context: context to work with :type context: TransformationContext """ super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION) assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models' for f in context.schemas[Schema.OUTPUT]: assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task' # create hidden variables for each categorical output internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT])) self.context.schemas[Schema.INTERNAL] = internal_schema
Example #14
Source File: stacking.py From stacked_generalization with Apache License 2.0 | 5 votes |
def _get_blend_init(self, y_train, clf): if self.stack_by_proba and hasattr(clf, 'predict_proba'): width = self.n_classes_ - 1 elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin): width = self.n_classes_ elif hasattr(clf, 'predict'): width = 1 elif hasattr(clf, 'n_components'): width = clf.n_components else: raise Exception('Unimplemented for {0}'.format(type(clf))) return np.zeros((y_train.size, width))
Example #15
Source File: classification.py From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, metric='riemann', tsupdate=False, clf=LogisticRegression()): """Init.""" self.metric = metric self.tsupdate = tsupdate self.clf = clf if not isinstance(clf, ClassifierMixin): raise TypeError('clf must be a ClassifierMixin') TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
Example #16
Source File: ABuMLGrid.py From abu with GNU General Public License v3.0 | 5 votes |
def _scoring_grid(estimator, scoring): """ 只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据 学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score, 使用make_scorer对函数进行score封装 :param estimator: 学习器对象 :param scoring: 度量使用的方法,未赋予的情况根据 学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量 :return: scoring """ if not isinstance(estimator, (ClassifierMixin, RegressorMixin)): logging.info('only support supervised learning') # TODO 无监督学习的scoring度量以及GridSearchCV return None if scoring is None: if isinstance(estimator, ClassifierMixin): # 分类器使用accuracy return 'accuracy' elif isinstance(estimator, RegressorMixin): # 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装 """ make_scorer中通过greater_is_better对返回值进行正负分配 eg: sign = 1 if greater_is_better else -1 """ return make_scorer(explained_variance_score, greater_is_better=True) return None return scoring
Example #17
Source File: ABuML.py From abu with GNU General Public License v3.0 | 5 votes |
def plot_graphviz_tree(self, **kwargs): """ 被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF, EMLFitType.E_FIT_REG))装饰, 即支持有监督学习回归和分类,绘制决策树或者core基于树的分类回归算法的决策示意图绘制,查看 学习器本身hasattr(fiter, 'tree_')是否有tree_属性,如果没有使用决策树替换 :param kwargs: 外部可以传递x, y, 通过 x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) 装饰器使用的fiter_type, eg: ttn_abu = AbuML.create_test_more_fiter() ttn_abu.plot_graphviz_tree(fiter_type=ml.EMLFitType.E_FIT_CLF) """ x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) fiter = self.get_fiter() if not hasattr(fiter, 'tree_'): self.log_func('{} not hasattr tree_, use decision tree replace'.format( fiter.__class__.__name__)) if isinstance(fiter, ClassifierMixin): # FIXME 最好不要使用ClassifierMixin判定学习器类型,因为限定了sklearn fiter = self.estimator.decision_tree_classifier(assign=False) elif isinstance(fiter, RegressorMixin): # # FIXME 最好不要使用RegressorMixin, AbuMLCreater中引用了hmmlearn,xgboost等第三方库 fiter = self.estimator.decision_tree_regressor(assign=False) else: fiter = self.estimator.decision_tree_classifier(assign=False) # 这里需要将self.df.columns做为名字传入 return ABuMLExecute.graphviz_tree(fiter, self.df.columns, x, y)
Example #18
Source File: elm.py From SVM-CNN with Apache License 2.0 | 5 votes |
def score(self, X, y): """Force use of accuracy score since we don't inherit from ClassifierMixin""" from sklearn.metrics import accuracy_score return accuracy_score(y, self.predict(X))
Example #19
Source File: stacking_estimator.py From tpot with GNU Lesser General Public License v3.0 | 5 votes |
def transform(self, X): """Transform data by adding two synthetic feature(s). Parameters ---------- X: numpy ndarray, {n_samples, n_components} New data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_transformed: array-like, shape (n_samples, n_features + 1) or (n_samples, n_features + 1 + n_classes) for classifier with predict_proba attribute The transformed feature set. """ X = check_array(X) X_transformed = np.copy(X) # add class probabilities as a synthetic feature if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'): y_pred_proba = self.estimator.predict_proba(X) # check all values that should be not infinity or not NAN if np.all(np.isfinite(y_pred_proba)): X_transformed = np.hstack((y_pred_proba, X)) # add class prediction as a synthetic feature X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed)) return X_transformed
Example #20
Source File: test_voting.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sample_weight(): """Tests sample_weight parameter of VotingClassifier""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = SVC(gamma='scale', probability=True, random_state=123) eclf1 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft').fit(X, y, sample_weight=np.ones((len(y),))) eclf2 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) sample_weight = np.random.RandomState(123).uniform(size=(len(y),)) eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft') eclf3.fit(X, y, sample_weight) clf1.fit(X, y, sample_weight) assert_array_equal(eclf3.predict(X), clf1.predict(X)) assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X)) # check that an error is raised and indicative if sample_weight is not # supported. clf4 = KNeighborsClassifier() eclf3 = VotingClassifier(estimators=[ ('lr', clf1), ('svc', clf3), ('knn', clf4)], voting='soft') msg = ('Underlying estimator KNeighborsClassifier does not support ' 'sample weights.') with pytest.raises(ValueError, match=msg): eclf3.fit(X, y, sample_weight) # check that _parallel_fit_estimator will raise the right error # it should raise the original error if this is not linked to sample_weight class ClassifierErrorFit(BaseEstimator, ClassifierMixin): def fit(self, X, y, sample_weight): raise TypeError('Error unrelated to sample_weight.') clf = ClassifierErrorFit() with pytest.raises(TypeError, match='Error unrelated to sample_weight'): clf.fit(X, y, sample_weight=sample_weight)
Example #21
Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _generate_bases_test(est, pd_est): def test(self): self.assertTrue(isinstance(pd_est, FrameMixin), pd_est) self.assertFalse(isinstance(est, FrameMixin)) self.assertTrue(isinstance(pd_est, base.BaseEstimator)) try: mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.DensityMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] except: if _sklearn_ver > 17: raise mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] for mixin in mixins: self.assertEqual( isinstance(pd_est, mixin), isinstance(est, mixin), mixin) return test
Example #22
Source File: util.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def transform(self, X): X = check_array(X) X_transformed = np.copy(X) # add class probabilities as a synthetic feature if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'): X_transformed = np.hstack((self.estimator.predict_proba(X), X)) # add class prodiction as a synthetic feature X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed)) return X_transformed
Example #23
Source File: model.py From ebonite with Apache License 2.0 | 5 votes |
def _exposed_methods_mapping(self) -> Dict[str, str]: ret = { 'predict': 'predict' } if isinstance(self.model, ClassifierMixin): ret['predict_proba'] = 'predict_proba' return ret
Example #24
Source File: investigate.py From sklearn-onnx with MIT License | 4 votes |
def enumerate_pipeline_models(pipe, coor=None, vs=None): """ Enumerates all the models within a pipeline. """ if coor is None: coor = (0,) yield coor, pipe, vs if hasattr(pipe, 'transformer_and_mapper_list') and len( pipe.transformer_and_mapper_list): # azureml DataTransformer raise NotImplementedError("Unable to handle this specific case.") elif hasattr(pipe, 'mapper') and pipe.mapper: # azureml DataTransformer for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)): yield couple elif hasattr(pipe, 'built_features'): # sklearn_pandas.dataframe_mapper.DataFrameMapper for i, (columns, transformers, _) in enumerate(pipe.built_features): if isinstance(columns, str): columns = (columns,) if transformers is None: yield (coor + (i,)), None, columns else: for couple in enumerate_pipeline_models(transformers, coor + (i,), columns): yield couple elif isinstance(pipe, Pipeline): for i, (_, model) in enumerate(pipe.steps): for couple in enumerate_pipeline_models(model, coor + (i,)): yield couple elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer): for i, (_, fitted_transformer, column) in enumerate(pipe.transformers): for couple in enumerate_pipeline_models( fitted_transformer, coor + (i,), column): yield couple elif isinstance(pipe, FeatureUnion): for i, (_, model) in enumerate(pipe.transformer_list): for couple in enumerate_pipeline_models(model, coor + (i,)): yield couple elif TransformedTargetRegressor is not None and isinstance( pipe, TransformedTargetRegressor): raise NotImplementedError( "Not yet implemented for TransformedTargetRegressor.") elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)): pass elif isinstance(pipe, BaseEstimator): pass else: raise TypeError( "Parameter pipe is not a scikit-learn object: {}\n{}".format( type(pipe), pipe))
Example #25
Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, base_estimator, method="histogram", bins="auto", interpolation=None, variable_width=False, cv=1): """Constructor. Parameters ---------- * `base_estimator` [`ClassifierMixin`]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If `cv=prefit`, the classifier must have been fit already on data. * `method` [string]: The method to use for calibration. Supported methods include `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and `"sigmoid"`. * `bins` [int, default="auto"]: The number of bins, if `method` is `"histogram"`. * `interpolation` [string, optional] Specifies the kind of interpolation between bins as a string (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`, `"cubic"`), if `method` is `"histogram"`. * `variable_dith_width` [boolean, optional] If True use equal probability variable length bins, if `method` is `"histogram"`. * `cv` [integer, cross-validation generator, iterable or `"prefit"`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. If `"prefit"` is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If `cv=1`, the training data is used for both training and calibration. """ self.base_estimator = base_estimator self.method = method self.bins = bins self.interpolation = interpolation self.variable_width = variable_width self.cv = cv
Example #26
Source File: base.py From carl with BSD 3-Clause "New" or "Revised" License | 4 votes |
def as_classifier(regressor): """Wrap a Scikit-Learn regressor into a binary classifier. This function can be used to solve a binary classification problem as a regression problem, where output labels {0,1} are treated as real values. The wrapped regressor exhibits the classifier API, with the corresponding `predict`, `predict_proba` and `score` methods. Parameters ---------- * `regressor` [`RegressorMixin`]: The regressor object. Returns ------- * `clf` [`ClassifierMixin`]: The wrapped regressor, but with a classifier API. """ class Wrapper(BaseEstimator, ClassifierMixin): def __init__(self, base_estimator): self.base_estimator = base_estimator def fit(self, X, y, **kwargs): # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Fit regressor self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs) return self def predict(self, X): return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0]) def predict_proba(self, X): X = check_array(X) df = self.regressor_.predict(X) df = np.clip(df, 0., 1.) probas = np.zeros((len(X), 2)) probas[:, 0] = 1. - df probas[:, 1] = df return probas def score(self, X, y): return self.regressor_.score(X, y) return Wrapper(regressor)
Example #27
Source File: bench_ml.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License | 4 votes |
def evaluate(self, point): """ Fits model using the particular setting of hyperparameters and evaluates the model validation data. Parameters ---------- * `point`: dict A mapping of parameter names to the corresponding values Returns ------- * `score`: float Score (more is better!) for some specific point """ X_train, y_train, X_test, y_test = ( self.X_train, self.y_train, self.X_test, self.y_test) # apply transformation to model parameters, for example exp transformation point_mapped = {} for param, val in point.items(): point_mapped[param] = self.space[param][1](val) model_instance = self.model(**point_mapped) if 'random_state' in model_instance.get_params(): model_instance.set_params(random_state=self.random_state) min_obj_val = -5.0 # Infeasible parameters are expected to raise an exception, thus the try # catch below, infeasible parameters yield assumed smallest objective. try: model_instance.fit(X_train, y_train) if isinstance(model_instance, RegressorMixin): # r^2 metric y_predicted = model_instance.predict(X_test) score = r2_score(y_test, y_predicted) elif isinstance(model_instance, ClassifierMixin): # log loss y_predicted = model_instance.predict_proba(X_test) score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss if math.isnan(score): score = min_obj_val score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN except BaseException as ex: score = min_obj_val # on error: return assumed smallest value of objective function return score # this is necessary to generate table for README in the end