Python sklearn.pipeline() Examples

The following are 15 code examples of sklearn.pipeline(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 6 votes vote down vote up
def make_pmml_pipeline(obj, active_fields = None, target_fields = None):
	"""Translates a regular Scikit-Learn estimator or pipeline to a PMML pipeline.

	Parameters:
	----------
	obj: BaseEstimator
		The object.

	active_fields: list of strings, optional
		Feature names. If missing, "x1", "x2", .., "xn" are assumed.

	target_fields: list of strings, optional
		Label name(s). If missing, "y" is assumed.

	"""
	steps = _filter_steps(_get_steps(obj))
	pipeline = PMMLPipeline(steps)
	if active_fields is not None:
		pipeline.active_fields = numpy.asarray(active_fields)
	if target_fields is not None:
		pipeline.target_fields = numpy.asarray(target_fields)
	return pipeline 
Example #2
Source File: classifiers.py    From seizure-prediction with MIT License 5 votes vote down vote up
def make_svm(gamma, C):
    cls = sklearn.pipeline.make_pipeline(StandardScaler(),
        SVC(gamma=gamma, C=C, probability=True, cache_size=500, random_state=0))
    name = 'ss-svc-g%.4f-C%.1f' % (gamma, C)
    return (cls, name) 
Example #3
Source File: classifiers.py    From seizure-prediction with MIT License 5 votes vote down vote up
def make_lr(C):
    cls = sklearn.pipeline.make_pipeline(StandardScaler(), LogisticRegression(C=C))
    name = 'ss-lr-C%.4f' % C
    return (cls, name) 
Example #4
Source File: classifiers.py    From seizure-prediction with MIT License 5 votes vote down vote up
def make_simple_lr():
    return (sklearn.pipeline.make_pipeline(StandardScaler(), SimpleLogisticRegression()), 'ss-slr') 
Example #5
Source File: test_base.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_set_params():
    # test nested estimator parameter setting
    clf = Pipeline([("svc", SVC())])
    # non-existing parameter in svc
    assert_raises(ValueError, clf.set_params, svc__stupid_param=True)
    # non-existing parameter of pipeline
    assert_raises(ValueError, clf.set_params, svm__stupid_param=True)
    # we don't currently catch if the things in pipeline are estimators
    # bad_pipeline = Pipeline([("bad", NoEstimator())])
    # assert_raises(AttributeError, bad_pipeline.set_params,
    #               bad__stupid_param=True) 
Example #6
Source File: sklearn_patches.py    From tslearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def check_pipeline_consistency(name, estimator_orig):
    if estimator_orig._get_tags()['non_deterministic']:
        msg = name + ' is non deterministic'
        raise SkipTest(msg)

    # check that make_pipeline(est) gives same score as est
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X -= X.min()
    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
    estimator = clone(estimator_orig)
    y = multioutput_estimator_convert_y_2d(estimator, y)
    set_random_state(estimator)
    pipeline = make_pipeline(estimator)
    estimator.fit(X, y)
    pipeline.fit(X, y)

    funcs = ["score", "fit_transform"]

    for func_name in funcs:
        func = getattr(estimator, func_name, None)
        if func is not None:
            func_pipeline = getattr(pipeline, func_name)
            result = func(X, y)
            result_pipe = func_pipeline(X, y)
            assert_allclose_dense_sparse(result, result_pipe) 
Example #7
Source File: pipeline.py    From timeserio with MIT License 5 votes vote down vote up
def __init__(self, groupby, pipeline, errors='raise'):
        self.groupby = groupby
        self.pipeline = pipeline
        self.errors = errors 
Example #8
Source File: pipeline.py    From timeserio with MIT License 5 votes vote down vote up
def _fit_subdf(self, sub_df, y=None):
        return clone(self.pipeline).fit(sub_df, y=y) 
Example #9
Source File: pipeline.py    From timeserio with MIT License 5 votes vote down vote up
def _call_pipeline(self, df, y=None, attr=None):
        check_is_fitted(self, 'pipelines_')
        self.one_transformed = False
        transformed = [
            self._call_pipeline_subdf(key, sub_df, attr=attr)
            for key, sub_df, sub_y in self._iter_groups(df, y=y)
        ]
        if not self.one_transformed and self.errors == 'return_empty':
            raise KeyError('All keys missing in fitted pipelines')
        out = pd.concat(transformed).reindex(df.index)
        # Convert back to np.array if the pipeline returns a np.array
        if self.one_transformed and self.cast_to_numpy:
            return out.values
        return out 
Example #10
Source File: pipeline.py    From timeserio with MIT License 5 votes vote down vote up
def required_columns(self):
        groupby = [self.groupby] if type(self.groupby) is str else self.groupby
        return self.pipeline.required_columns | set(groupby) 
Example #11
Source File: pipeline.py    From timeserio with MIT License 5 votes vote down vote up
def transformed_columns(self, input_columns):
        return self.pipeline.transformed_columns(input_columns) 
Example #12
Source File: test_base.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_set_params():
    # test nested estimator parameter setting
    clf = Pipeline([("svc", SVC())])
    # non-existing parameter in svc
    assert_raises(ValueError, clf.set_params, svc__stupid_param=True)
    # non-existing parameter of pipeline
    assert_raises(ValueError, clf.set_params, svm__stupid_param=True)
    # we don't currently catch if the things in pipeline are estimators
    # bad_pipeline = Pipeline([("bad", NoEstimator())])
    # assert_raises(AttributeError, bad_pipeline.set_params,
    #               bad__stupid_param=True) 
Example #13
Source File: build_model.py    From gordo with GNU Affero General Public License v3.0 4 votes vote down vote up
def _extract_metadata_from_model(
        model: BaseEstimator, metadata: dict = dict()
    ) -> dict:
        """
        Recursively check for :class:`gordo.machine.model.base.GordoBase` in a
        given ``model``. If such the model exists buried inside of a
        :class:`sklearn.pipeline.Pipeline` which is then part of another
        :class:`sklearn.base.BaseEstimator`, this function will return its metadata.

        Parameters
        ----------
        model: BaseEstimator
        metadata: dict
            Any initial starting metadata, but is mainly meant to be used during
            the recursive calls to accumulate any multiple
            :class:`gordo.machine.model.base.GordoBase` models found in this model

        Notes
        -----
        If there is a ``GordoBase`` model inside of a ``Pipeline`` which is not the final
        step, this function will not find it.

        Returns
        -------
        dict
            Dictionary representing accumulated calls to
            :meth:`gordo.machine.model.base.GordoBase.get_metadata`
        """
        metadata = metadata.copy()

        # If it's a Pipeline, only need to get the last step, which potentially has metadata
        if isinstance(model, Pipeline):
            final_step = model.steps[-1][1]
            metadata.update(ModelBuilder._extract_metadata_from_model(final_step))
            return metadata

        # GordoBase is simple, having a .get_metadata()
        if isinstance(model, GordoBase):
            metadata.update(model.get_metadata())

        # Continue to look at object values in case, we decided to have a GordoBase
        # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators
        # which can take a GordoBase model as a parameter, which will then have metadata to get
        for val in model.__dict__.values():
            if isinstance(val, Pipeline):
                metadata.update(
                    ModelBuilder._extract_metadata_from_model(val.steps[-1][1])
                )
            elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
                metadata.update(ModelBuilder._extract_metadata_from_model(val))
        return metadata 
Example #14
Source File: clf_helpers.py    From ibeis with Apache License 2.0 4 votes vote down vote up
def _get_estimator(pblm, clf_key):
        """
        Returns sklearn classifier
        """
        tup = clf_key.split('-')
        wrap_type = None if len(tup) == 1 else tup[1]
        est_type = tup[0]
        multiclass_wrapper = {
            None: ut.identity,
            'OVR': sklearn.multiclass.OneVsRestClassifier,
            'OVO': sklearn.multiclass.OneVsOneClassifier,
        }[wrap_type]
        est_class = {
            'RF': sklearn.ensemble.RandomForestClassifier,
            'SVC': sklearn.svm.SVC,
            'Logit': sklearn.linear_model.LogisticRegression,
            'MLP': sklearn.neural_network.MLPClassifier,
        }[est_type]

        est_kw1, est_kw2 = pblm._estimator_params(est_type)
        est_params = ut.merge_dicts(est_kw1, est_kw2)

        # steps = []
        # steps.append((est_type, est_class(**est_params)))
        # if wrap_type is not None:
        #     steps.append((wrap_type, multiclass_wrapper))
        if est_type == 'MLP':
            def clf_partial():
                pipe = sklearn.pipeline.Pipeline([
                    ('inputer', sklearn.preprocessing.Imputer(
                        missing_values='NaN', strategy='mean', axis=0)),
                    # ('scale', sklearn.preprocessing.StandardScaler),
                    ('est', est_class(**est_params)),
                ])
                return multiclass_wrapper(pipe)
        elif est_type == 'Logit':
            def clf_partial():
                pipe = sklearn.pipeline.Pipeline([
                    ('inputer', sklearn.preprocessing.Imputer(
                        missing_values='NaN', strategy='mean', axis=0)),
                    ('est', est_class(**est_params)),
                ])
                return multiclass_wrapper(pipe)
        else:
            def clf_partial():
                return multiclass_wrapper(est_class(**est_params))

        return clf_partial 
Example #15
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray],
                            svmType: str,
                            C: float,
                            gamma: Optional[float]) -> sklearn.svm.SVC:
    """ Trains an SVM classifier for a user

    Parameters
    ----------
    training_set: Tuple (x, y)
        The training set (features and labels). y should have labels -1 and 1
    svmType: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel

    Returns
    -------
    sklearn.svm.SVC:
        The learned classifier

    """

    assert svmType in ['linear', 'rbf']

    train_x = training_set[0]
    train_y = training_set[1]

    # Adjust for the skew between positive and negative classes
    n_genuine = len([x for x in train_y if x == 1])
    n_forg = len([x for x in train_y if x == -1])
    skew = n_forg / float(n_genuine)

    # Train the model
    if svmType == 'rbf':
        model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew})
    else:
        model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew})

    model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)),
                                           ('classifier', model)])

    model_with_scaler.fit(train_x, train_y)

    return model_with_scaler