Python sklearn.pipeline() Examples
The following are 15
code examples of sklearn.pipeline().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.
Example #1
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 6 votes |
def make_pmml_pipeline(obj, active_fields = None, target_fields = None): """Translates a regular Scikit-Learn estimator or pipeline to a PMML pipeline. Parameters: ---------- obj: BaseEstimator The object. active_fields: list of strings, optional Feature names. If missing, "x1", "x2", .., "xn" are assumed. target_fields: list of strings, optional Label name(s). If missing, "y" is assumed. """ steps = _filter_steps(_get_steps(obj)) pipeline = PMMLPipeline(steps) if active_fields is not None: pipeline.active_fields = numpy.asarray(active_fields) if target_fields is not None: pipeline.target_fields = numpy.asarray(target_fields) return pipeline
Example #2
Source File: classifiers.py From seizure-prediction with MIT License | 5 votes |
def make_svm(gamma, C): cls = sklearn.pipeline.make_pipeline(StandardScaler(), SVC(gamma=gamma, C=C, probability=True, cache_size=500, random_state=0)) name = 'ss-svc-g%.4f-C%.1f' % (gamma, C) return (cls, name)
Example #3
Source File: classifiers.py From seizure-prediction with MIT License | 5 votes |
def make_lr(C): cls = sklearn.pipeline.make_pipeline(StandardScaler(), LogisticRegression(C=C)) name = 'ss-lr-C%.4f' % C return (cls, name)
Example #4
Source File: classifiers.py From seizure-prediction with MIT License | 5 votes |
def make_simple_lr(): return (sklearn.pipeline.make_pipeline(StandardScaler(), SimpleLogisticRegression()), 'ss-slr')
Example #5
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_set_params(): # test nested estimator parameter setting clf = Pipeline([("svc", SVC())]) # non-existing parameter in svc assert_raises(ValueError, clf.set_params, svc__stupid_param=True) # non-existing parameter of pipeline assert_raises(ValueError, clf.set_params, svm__stupid_param=True) # we don't currently catch if the things in pipeline are estimators # bad_pipeline = Pipeline([("bad", NoEstimator())]) # assert_raises(AttributeError, bad_pipeline.set_params, # bad__stupid_param=True)
Example #6
Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License | 5 votes |
def check_pipeline_consistency(name, estimator_orig): if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) # check that make_pipeline(est) gives same score as est X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel) estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) pipeline = make_pipeline(estimator) estimator.fit(X, y) pipeline.fit(X, y) funcs = ["score", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func_pipeline = getattr(pipeline, func_name) result = func(X, y) result_pipe = func_pipeline(X, y) assert_allclose_dense_sparse(result, result_pipe)
Example #7
Source File: pipeline.py From timeserio with MIT License | 5 votes |
def __init__(self, groupby, pipeline, errors='raise'): self.groupby = groupby self.pipeline = pipeline self.errors = errors
Example #8
Source File: pipeline.py From timeserio with MIT License | 5 votes |
def _fit_subdf(self, sub_df, y=None): return clone(self.pipeline).fit(sub_df, y=y)
Example #9
Source File: pipeline.py From timeserio with MIT License | 5 votes |
def _call_pipeline(self, df, y=None, attr=None): check_is_fitted(self, 'pipelines_') self.one_transformed = False transformed = [ self._call_pipeline_subdf(key, sub_df, attr=attr) for key, sub_df, sub_y in self._iter_groups(df, y=y) ] if not self.one_transformed and self.errors == 'return_empty': raise KeyError('All keys missing in fitted pipelines') out = pd.concat(transformed).reindex(df.index) # Convert back to np.array if the pipeline returns a np.array if self.one_transformed and self.cast_to_numpy: return out.values return out
Example #10
Source File: pipeline.py From timeserio with MIT License | 5 votes |
def required_columns(self): groupby = [self.groupby] if type(self.groupby) is str else self.groupby return self.pipeline.required_columns | set(groupby)
Example #11
Source File: pipeline.py From timeserio with MIT License | 5 votes |
def transformed_columns(self, input_columns): return self.pipeline.transformed_columns(input_columns)
Example #12
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_set_params(): # test nested estimator parameter setting clf = Pipeline([("svc", SVC())]) # non-existing parameter in svc assert_raises(ValueError, clf.set_params, svc__stupid_param=True) # non-existing parameter of pipeline assert_raises(ValueError, clf.set_params, svm__stupid_param=True) # we don't currently catch if the things in pipeline are estimators # bad_pipeline = Pipeline([("bad", NoEstimator())]) # assert_raises(AttributeError, bad_pipeline.set_params, # bad__stupid_param=True)
Example #13
Source File: build_model.py From gordo with GNU Affero General Public License v3.0 | 4 votes |
def _extract_metadata_from_model( model: BaseEstimator, metadata: dict = dict() ) -> dict: """ Recursively check for :class:`gordo.machine.model.base.GordoBase` in a given ``model``. If such the model exists buried inside of a :class:`sklearn.pipeline.Pipeline` which is then part of another :class:`sklearn.base.BaseEstimator`, this function will return its metadata. Parameters ---------- model: BaseEstimator metadata: dict Any initial starting metadata, but is mainly meant to be used during the recursive calls to accumulate any multiple :class:`gordo.machine.model.base.GordoBase` models found in this model Notes ----- If there is a ``GordoBase`` model inside of a ``Pipeline`` which is not the final step, this function will not find it. Returns ------- dict Dictionary representing accumulated calls to :meth:`gordo.machine.model.base.GordoBase.get_metadata` """ metadata = metadata.copy() # If it's a Pipeline, only need to get the last step, which potentially has metadata if isinstance(model, Pipeline): final_step = model.steps[-1][1] metadata.update(ModelBuilder._extract_metadata_from_model(final_step)) return metadata # GordoBase is simple, having a .get_metadata() if isinstance(model, GordoBase): metadata.update(model.get_metadata()) # Continue to look at object values in case, we decided to have a GordoBase # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators # which can take a GordoBase model as a parameter, which will then have metadata to get for val in model.__dict__.values(): if isinstance(val, Pipeline): metadata.update( ModelBuilder._extract_metadata_from_model(val.steps[-1][1]) ) elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator): metadata.update(ModelBuilder._extract_metadata_from_model(val)) return metadata
Example #14
Source File: clf_helpers.py From ibeis with Apache License 2.0 | 4 votes |
def _get_estimator(pblm, clf_key): """ Returns sklearn classifier """ tup = clf_key.split('-') wrap_type = None if len(tup) == 1 else tup[1] est_type = tup[0] multiclass_wrapper = { None: ut.identity, 'OVR': sklearn.multiclass.OneVsRestClassifier, 'OVO': sklearn.multiclass.OneVsOneClassifier, }[wrap_type] est_class = { 'RF': sklearn.ensemble.RandomForestClassifier, 'SVC': sklearn.svm.SVC, 'Logit': sklearn.linear_model.LogisticRegression, 'MLP': sklearn.neural_network.MLPClassifier, }[est_type] est_kw1, est_kw2 = pblm._estimator_params(est_type) est_params = ut.merge_dicts(est_kw1, est_kw2) # steps = [] # steps.append((est_type, est_class(**est_params))) # if wrap_type is not None: # steps.append((wrap_type, multiclass_wrapper)) if est_type == 'MLP': def clf_partial(): pipe = sklearn.pipeline.Pipeline([ ('inputer', sklearn.preprocessing.Imputer( missing_values='NaN', strategy='mean', axis=0)), # ('scale', sklearn.preprocessing.StandardScaler), ('est', est_class(**est_params)), ]) return multiclass_wrapper(pipe) elif est_type == 'Logit': def clf_partial(): pipe = sklearn.pipeline.Pipeline([ ('inputer', sklearn.preprocessing.Imputer( missing_values='NaN', strategy='mean', axis=0)), ('est', est_class(**est_params)), ]) return multiclass_wrapper(pipe) else: def clf_partial(): return multiclass_wrapper(est_class(**est_params)) return clf_partial
Example #15
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray], svmType: str, C: float, gamma: Optional[float]) -> sklearn.svm.SVC: """ Trains an SVM classifier for a user Parameters ---------- training_set: Tuple (x, y) The training set (features and labels). y should have labels -1 and 1 svmType: string ('linear' or 'rbf') The SVM type C: float Regularization for the SVM optimization gamma: float Hyperparameter for the RBF kernel Returns ------- sklearn.svm.SVC: The learned classifier """ assert svmType in ['linear', 'rbf'] train_x = training_set[0] train_y = training_set[1] # Adjust for the skew between positive and negative classes n_genuine = len([x for x in train_y if x == 1]) n_forg = len([x for x in train_y if x == -1]) skew = n_forg / float(n_genuine) # Train the model if svmType == 'rbf': model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew}) else: model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew}) model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)), ('classifier', model)]) model_with_scaler.fit(train_x, train_y) return model_with_scaler