Python Examples of sklearn.base.RegressorMixin

Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _tested_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        # FIXME _skip_test should be used here (if we could)

        required_parameters = getattr(Estimator, "_required_parameters", [])
        if len(required_parameters):
            if required_parameters in (["estimator"], ["base_estimator"]):
                if issubclass(Estimator, RegressorMixin):
                    estimator = Estimator(Ridge())
                else:
                    estimator = Estimator(LinearDiscriminantAnalysis())
            else:
                warnings.warn("Can't instantiate estimator {} which requires "
                              "parameters {}".format(name,
                                                     required_parameters),
                              SkipTestWarning)
                continue
        else:
            estimator = Estimator()
        yield name, estimator

Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License

5 votes

def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test

Source File: baseestimator.py From bartpy with MIT License

5 votes

def __init__(self,
                 base_estimator: RegressorMixin = None,
                 **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13):
		active_fields = _get_column_names(X)
		if self.active_fields is None or active_fields is None:
			raise ValueError("Cannot perform model validation with anonymous data")
		if self.active_fields.tolist() != active_fields.tolist():
			raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields))
		active_values = _get_values(X)
		y = self.predict(X, **predict_params)
		target_values = _get_values(y)
		estimator = self._final_estimator
		if isinstance(estimator, BaseEstimator):
			if isinstance(estimator, RegressorMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif isinstance(estimator, ClassifierMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				if hasattr(estimator, "predict_proba"):
					try:
						y_proba = self.predict_proba(X, **predict_proba_params)
						self.verification.probability_values = _get_values(y_proba)
					except AttributeError:
						pass
		# elif isinstance(estimator, H2OEstimator):
		elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"):
			if estimator._estimator_type == "regressor":
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif estimator._estimator_type == "classifier":
				probability_values = target_values[:, 1:]
				target_values = target_values[:, 0]
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				self.verification.probability_values = probability_values

Source File: ABuML.py From abu with GNU General Public License v3.0

5 votes

def plot_graphviz_tree(self, **kwargs):
        """
        被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF, EMLFitType.E_FIT_REG))装饰，
        即支持有监督学习回归和分类，绘制决策树或者core基于树的分类回归算法的决策示意图绘制，查看
        学习器本身hasattr(fiter, 'tree_')是否有tree_属性，如果没有使用决策树替换

        :param kwargs:  外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                        装饰器使用的fiter_type，
                        eg:
                            ttn_abu = AbuML.create_test_more_fiter()
                            ttn_abu.plot_graphviz_tree(fiter_type=ml.EMLFitType.E_FIT_CLF)
        """
        x = kwargs.pop('x', self.x)
        y = kwargs.pop('y', self.y)
        fiter = self.get_fiter()

        if not hasattr(fiter, 'tree_'):
            self.log_func('{} not hasattr tree_, use decision tree replace'.format(
                fiter.__class__.__name__))

            if isinstance(fiter, ClassifierMixin):
                # FIXME 最好不要使用ClassifierMixin判定学习器类型，因为限定了sklearn
                fiter = self.estimator.decision_tree_classifier(assign=False)
            elif isinstance(fiter, RegressorMixin):
                # # FIXME 最好不要使用RegressorMixin, AbuMLCreater中引用了hmmlearn，xgboost等第三方库
                fiter = self.estimator.decision_tree_regressor(assign=False)
            else:
                fiter = self.estimator.decision_tree_classifier(assign=False)
        # 这里需要将self.df.columns做为名字传入
        return ABuMLExecute.graphviz_tree(fiter, self.df.columns, x, y)

Source File: ABuMLGrid.py From abu with GNU General Public License v3.0

5 votes

def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习，对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量，回归器使用可释方差值explained_variance_score，
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法，未赋予的情况根据
                    学习器分类器使用accuracy进行度量，回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score，使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring

Source File: tree.py From sklearn-pmml with MIT License

5 votes

def __init__(self, estimator, context, mode):
        super(DecisionTreeConverter, self).__init__(estimator, context, mode)

        assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported'
        assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute'
        if mode == ModelMode.CLASSIFICATION:
            if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature):
                self.prediction_output = self.OUTPUT_LABEL
            else:
                self.prediction_output = self.OUTPUT_PROBABILITY
            assert isinstance(self.estimator, ClassifierMixin), \
                'Only a classifier can be serialized in classification mode'
        if mode == ModelMode.REGRESSION:
            assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \
                'Only a numeric feature can be an output of regression'
            assert isinstance(self.estimator, RegressorMixin), \
                'Only a regressor can be serialized in regression mode'
        assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \
            'Tree outputs {} results while the schema specifies {} output fields'.format(
                estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT]))

        # create hidden variables for each categorical output
        # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree
        # TODO: converter and subclasses for classifier and regression converters
        internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
        self.context.schemas[Schema.INTERNAL] = internal_schema

Source File: bench_ml.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License

4 votes

def evaluate(self, point):
        """
        Fits model using the particular setting of hyperparameters and
        evaluates the model validation data.

        Parameters
        ----------
        * `point`: dict
            A mapping of parameter names to the corresponding values

        Returns
        -------
        * `score`: float
            Score (more is better!) for some specific point
        """
        X_train, y_train, X_test, y_test = (
            self.X_train, self.y_train, self.X_test, self.y_test)

        # apply transformation to model parameters, for example exp transformation
        point_mapped = {}
        for param, val in point.items():
            point_mapped[param] = self.space[param][1](val)

        model_instance = self.model(**point_mapped)

        if 'random_state' in model_instance.get_params():
            model_instance.set_params(random_state=self.random_state)

        min_obj_val = -5.0

        # Infeasible parameters are expected to raise an exception, thus the try
        # catch below, infeasible parameters yield assumed smallest objective.
        try:
            model_instance.fit(X_train, y_train)
            if isinstance(model_instance, RegressorMixin): # r^2 metric
                y_predicted = model_instance.predict(X_test)
                score = r2_score(y_test, y_predicted)
            elif isinstance(model_instance, ClassifierMixin): # log loss
                y_predicted = model_instance.predict_proba(X_test)
                score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better
            # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss
            if math.isnan(score):
                score = min_obj_val
            score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
        except BaseException as ex:
            score = min_obj_val # on error: return assumed smallest value of objective function

        return score

# this is necessary to generate table for README in the end

Source File: investigate.py From sklearn-onnx with MIT License

4 votes

def enumerate_pipeline_models(pipe, coor=None, vs=None):
    """
    Enumerates all the models within a pipeline.
    """
    if coor is None:
        coor = (0,)
    yield coor, pipe, vs
    if hasattr(pipe, 'transformer_and_mapper_list') and len(
            pipe.transformer_and_mapper_list):
        # azureml DataTransformer
        raise NotImplementedError("Unable to handle this specific case.")
    elif hasattr(pipe, 'mapper') and pipe.mapper:
        # azureml DataTransformer
        for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
            yield couple
    elif hasattr(pipe, 'built_features'):
        # sklearn_pandas.dataframe_mapper.DataFrameMapper
        for i, (columns, transformers, _) in enumerate(pipe.built_features):
            if isinstance(columns, str):
                columns = (columns,)
            if transformers is None:
                yield (coor + (i,)), None, columns
            else:
                for couple in enumerate_pipeline_models(transformers,
                                                        coor + (i,),
                                                        columns):
                    yield couple
    elif isinstance(pipe, Pipeline):
        for i, (_, model) in enumerate(pipe.steps):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer):
        for i, (_, fitted_transformer, column) in enumerate(pipe.transformers):
            for couple in enumerate_pipeline_models(
                    fitted_transformer, coor + (i,), column):
                yield couple
    elif isinstance(pipe, FeatureUnion):
        for i, (_, model) in enumerate(pipe.transformer_list):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif TransformedTargetRegressor is not None and isinstance(
            pipe, TransformedTargetRegressor):
        raise NotImplementedError(
            "Not yet implemented for TransformedTargetRegressor.")
    elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)):
        pass
    elif isinstance(pipe, BaseEstimator):
        pass
    else:
        raise TypeError(
            "Parameter pipe is not a scikit-learn object: {}\n{}".format(
                type(pipe), pipe))

Python sklearn.base.RegressorMixin() Examples