Python sklearn.pipeline.make_pipeline() Examples
The following are 30
code examples of sklearn.pipeline.make_pipeline().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.pipeline
, or try the search function
.
Example #1
Source File: 2_transform_solution.py From pandas-feature-union with MIT License | 7 votes |
def main(): raw_data = load_iris() data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"]) data.loc[:, "class"] = raw_data["target"] pipeline = FeatureUnion([ ("1", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]), # other transformations )), ("2", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]), # other transformations )) ]) X = pipeline.fit_transform(data) print(X["sepal length (cm)"].mean()) print(X["sepal width (cm)"].mean())
Example #2
Source File: 1_problem.py From pandas-feature-union with MIT License | 7 votes |
def main(): raw_data = load_iris() data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"]) pipeline = FeatureUnion([ ("1", make_pipeline( FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]), # other transformations )), ("2", make_pipeline( FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]), # other transformations )) ]) X = pipeline.fit_transform(data) print(X["sepal length (cm)"].mean()) print(X["sepal width (cm)"].mean())
Example #3
Source File: test_pipeline.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def test_make_pipeline(): t1 = Transf() t2 = Transf() pipe = make_pipeline(t1, t2) assert isinstance(pipe, Pipeline) assert_equal(pipe.steps[0][0], "transf-1") assert_equal(pipe.steps[1][0], "transf-2") pipe = make_pipeline(t1, t2, FitParamT()) assert isinstance(pipe, Pipeline) assert_equal(pipe.steps[0][0], "transf-1") assert_equal(pipe.steps[1][0], "transf-2") assert_equal(pipe.steps[2][0], "fitparamt") assert_raise_message( TypeError, 'Unknown keyword arguments: "random_parameter"', make_pipeline, t1, t2, random_parameter='rnd' )
Example #4
Source File: benchmark_test.py From nni with MIT License | 6 votes |
def test_time(pipeline_name, name, path): if pipeline_name == "LR": pipeline = make_pipeline(LogisticRegression()) if pipeline_name == "FGS": pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) if pipeline_name == "Tree": pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) test_benchmark = Benchmark() print("Dataset:\t", name) print("Pipeline:\t", pipeline_name) starttime = datetime.datetime.now() test_benchmark.run_test(pipeline, name, path) endtime = datetime.datetime.now() print("Used time: ", (endtime - starttime).microseconds/1000) print("")
Example #5
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gradient_boosting_with_init_pipeline(): # Check that the init estimator can be a pipeline (see issue #13466) X, y = make_regression(random_state=0) init = make_pipeline(LinearRegression()) gb = GradientBoostingRegressor(init=init) gb.fit(X, y) # pipeline without sample_weight works fine with pytest.raises( ValueError, match='The initial estimator Pipeline does not support sample ' 'weights'): gb.fit(X, y, sample_weight=np.ones(X.shape[0])) # Passing sample_weight to a pipeline raises a ValueError. This test makes # sure we make the distinction between ValueError raised by a pipeline that # was passed sample_weight, and a ValueError raised by a regular estimator # whose input checking failed. with pytest.raises( ValueError, match='nu <= 0 or nu > 1'): # Note that NuSVR properly supports sample_weight init = NuSVR(gamma='auto', nu=1.5) gb = GradientBoostingRegressor(init=init) gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
Example #6
Source File: test_pprint.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_pipeline(): # Render a pipeline object pipeline = make_pipeline(StandardScaler(), LogisticRegression(C=999)) expected = """ Pipeline(memory=None, steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logisticregression', LogisticRegression(C=999, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=100, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False))], verbose=False)""" expected = expected[1:] # remove first \n assert pipeline.__repr__() == expected
Example #7
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 6 votes |
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA from sklearn.kernel_approximation import Nystroem from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline union = FeatureUnion([("pca", PCA(n_components=1)), ("nys", Nystroem(n_components=2, random_state=42))]) sklearn_pipeline = make_pipeline(union, KNeighborsClassifier()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Example #8
Source File: 3_feature_union_solution.py From pandas-feature-union with MIT License | 6 votes |
def main(): raw_data = load_iris() data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"]) data.loc[:, "class"] = raw_data["target"] pipeline = PandasFeatureUnion([ ("1", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]), # other transformations )), ("2", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]), # other transformations )) ]) X = pipeline.fit_transform(data) print(X["sepal length (cm)"].mean()) print(X["sepal width (cm)"].mean())
Example #9
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 6 votes |
def test_compare_with_sklearn(self): from lale.operators import make_pipeline tfm = PCA() clf = LogisticRegression(LogisticRegression.solver.lbfgs, LogisticRegression.multi_class.auto) trainable = make_pipeline(tfm, clf) digits = sklearn.datasets.load_digits() trained = trainable.fit(digits.data, digits.target) predicted = trained.predict(digits.data) from sklearn.pipeline import make_pipeline as scikit_make_pipeline from sklearn.decomposition import PCA as SklearnPCA from sklearn.linear_model import LogisticRegression as SklearnLR sklearn_pipeline = scikit_make_pipeline(SklearnPCA(), SklearnLR(solver="lbfgs", multi_class="auto")) sklearn_pipeline.fit(digits.data, digits.target) predicted_sklearn = sklearn_pipeline.predict(digits.data) from sklearn.metrics import accuracy_score lale_score = accuracy_score(digits.target, predicted) scikit_score = accuracy_score(digits.target, predicted_sklearn) self.assertEqual(lale_score, scikit_score)
Example #10
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_lasso_cv_with_some_model_selection(): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedKFold from sklearn import datasets from sklearn.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline( StandardScaler(), LassoCV(cv=StratifiedKFold(n_splits=5)) ) pipe.fit(X, y)
Example #11
Source File: language_detector.py From text-mining-class with MIT License | 6 votes |
def build_language_classifier(texts, labels, verbose=False, random_state=None): """Train a text classifier with scikit-learn The text classifier is composed of two elements assembled in a pipeline: - A text feature extractor (`TfidfVectorizer`) that extract the relative frequencies of unigrams, bigrams and trigrams of characters in the text. - An instance of `SGDClassifier` for the classification it-self. To speed up training it is recommended to enable early stopping. `random_state` is passed to the underlying `SGDClassifier` instance. """ language_classifier = make_pipeline( TfidfVectorizer(analyzer="char", ngram_range=(1, 3), min_df=2, max_df=0.9, norm="l2", dtype=np.float32), SGDClassifier(early_stopping=True, validation_fraction=0.2, n_iter_no_change=3, max_iter=1000, tol=1e-3, alpha=1e-5, penalty="l2", verbose=verbose, random_state=random_state) ) return language_classifier.fit(texts, labels)
Example #12
Source File: tests.py From scikit-mdr with MIT License | 6 votes |
def test_mdr_sklearn_pipeline_parallel(): """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) clf = make_pipeline(MDR(), LogisticRegression()) cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1) assert np.mean(cv_scores) > 0.
Example #13
Source File: tests.py From scikit-mdr with MIT License | 6 votes |
def test_mdr_sklearn_pipeline(): """Ensure that MDR can be used as a transformer in a scikit-learn pipeline""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) clf = make_pipeline(MDR(), LogisticRegression()) cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True)) assert np.mean(cv_scores) > 0.
Example #14
Source File: sklearn_test.py From nni with MIT License | 6 votes |
def test(): url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2' urllib.request.urlretrieve(url_zip_train, filename='train.bz2') f_svm = open('train.svm', 'wt') with bz2.open('train.bz2', 'rb') as f_zip: data = f_zip.read() f_svm.write(data.decode('utf-8')) f_svm.close() X, y = load_svmlight_file('train.svm') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression()) # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) pipeline.fit(X_train, y_train) print("Pipeline Score: ", pipeline.score(X_train, y_train))
Example #15
Source File: test_core_operators.py From lale with Apache License 2.0 | 5 votes |
def test_comparison_with_scikit(self): import warnings warnings.filterwarnings("ignore") from lale.lib.sklearn import PCA import sklearn.datasets from lale.helpers import cross_val_score pca = PCA(n_components=3, random_state=42, svd_solver='arpack') nys = Nystroem(n_components=10, random_state=42) concat = ConcatFeatures() lr = LogisticRegression(random_state=42, C=0.1) trainable = (pca & nys) >> concat >> lr digits = sklearn.datasets.load_digits() X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42) cv_results = cross_val_score(trainable, X, y) cv_results = ['{0:.1%}'.format(score) for score in cv_results] from sklearn.pipeline import make_pipeline, FeatureUnion from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.linear_model import LogisticRegression as SklearnLR from sklearn.model_selection import cross_val_score union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')), ("nys", SklearnNystroem(n_components=10, random_state=42))]) lr = SklearnLR(random_state=42, C=0.1) pipeline = make_pipeline(union, lr) scikit_cv_results = cross_val_score(pipeline, X, y, cv = 5) scikit_cv_results = ['{0:.1%}'.format(score) for score in scikit_cv_results] self.assertEqual(cv_results, scikit_cv_results) warnings.resetwarnings()
Example #16
Source File: ols.py From bartpy with MIT License | 5 votes |
def run(n: int=10000, k_true: int=3, k_null: int=2): b_true = np.random.uniform(2, 0.1, size=k_true) b_true = np.array(list(b_true) + [0.0] * k_null) print(b_true) x = np.random.normal(0, 1, size=n * (k_true + k_null)).reshape(n, (k_true + k_null)) X = pd.DataFrame(x) y = np.random.normal(0, 0.1, size=n) + np.array(X.multiply(b_true, axis=1).sum(axis=1)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=True) model = SklearnModel(n_samples=50, n_burn=50, n_trees=20, store_in_sample_predictions=False, n_jobs=3, n_chains=1) pipeline = make_pipeline(SelectNullDistributionThreshold(model, n_permutations=20), model) pipeline_model = pipeline.fit(X_train, y_train) print("Thresholds", pipeline_model.named_steps["selectnulldistributionthreshold"].thresholds) print("Feature Proportions", pipeline_model.named_steps["selectnulldistributionthreshold"].feature_proportions) print("Is Kept", pipeline_model.named_steps["selectnulldistributionthreshold"]._get_support_mask()) pipeline_model.named_steps["selectnulldistributionthreshold"].plot()
Example #17
Source File: test_core_operators.py From lale with Apache License 2.0 | 5 votes |
def test_concat_with_hyperopt2(self): from lale.operators import make_pipeline, make_union from lale.lib.lale import Hyperopt pca = PCA(n_components=3) nys = Nystroem(n_components=10) concat = ConcatFeatures() lr = LogisticRegression(random_state=42, C=0.1) trainable = make_pipeline(make_union(pca, nys), lr) clf = Hyperopt(estimator=trainable, max_evals=2) from sklearn.datasets import load_iris iris_data = load_iris() clf.fit(iris_data.data, iris_data.target) clf.predict(iris_data.data)
Example #18
Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License | 5 votes |
def check_pipeline_consistency(name, estimator_orig): if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) # check that make_pipeline(est) gives same score as est X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel) estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) pipeline = make_pipeline(estimator) estimator.fit(X, y) pipeline.fit(X, y) funcs = ["score", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func_pipeline = getattr(pipeline, func_name) result = func(X, y) result_pipe = func_pipeline(X, y) assert_allclose_dense_sparse(result, result_pipe)
Example #19
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurf_pipeline_missing_values(): """Check: Data (Missing Values): MultiSURF works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURF(n_features_to_select=2), Imputer(), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7
Example #20
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurfstar_pipeline_missing_values(): """Check: Data (Missing Values): MultiSURF* works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURFstar(n_features_to_select=2), Imputer(), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7
Example #21
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surfstar_pipeline_missing_values(): """Check: Data (Missing Values): SURF* works in a sklearn pipeline""" np.random.seed(9238745) clf = make_pipeline(SURFstar(n_features_to_select=2), Imputer(), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7
Example #22
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surf_pipeline_missing_values(): """Check: Data (Missing Values): SURF works in a sklearn pipeline""" np.random.seed(240932) clf = make_pipeline(SURF(n_features_to_select=2), Imputer(), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7
Example #23
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_relieff_pipeline_missing_values(): """Check: Data (Missing Values): ReliefF works in a sklearn pipeline""" np.random.seed(49082) clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=10), Imputer(), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7
Example #24
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurfstar_pipeline_mixed_attributes(): """Check: Data (Mixed Attributes): MultiSURF* works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURFstar(n_features_to_select=2), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7
Example #25
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surfstar_pipeline_mixed_attributes(): """Check: Data (Mixed Attributes): SURF* works in a sklearn pipeline""" np.random.seed(9238745) clf = make_pipeline(SURFstar(n_features_to_select=2), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7
Example #26
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surf_pipeline_mixed_attributes(): """Check: Data (Mixed Attributes): SURF works in a sklearn pipeline""" np.random.seed(240932) clf = make_pipeline(SURF(n_features_to_select=2), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7
Example #27
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_relieff_pipeline_mixed_attributes(): """Check: Data (Mixed Attributes): ReliefF works in a sklearn pipeline""" np.random.seed(49082) clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=10), RandomForestClassifier(n_estimators=100, n_jobs=-1)) assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7
Example #28
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_multisurf_pipeline_cont_endpoint(): """Check: Data (Continuous Endpoint): MultiSURF works in a sklearn pipeline""" np.random.seed(320931) clf = make_pipeline(MultiSURF(n_features_to_select=2), RandomForestRegressor(n_estimators=100, n_jobs=-1)) assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3, n_jobs=-1))) < 0.5 # Test Mixed Attribute Data ------------------------------------------------------------------------------------
Example #29
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 5 votes |
def test_import_from_sklearn_pipeline1(self): from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline sklearn_pipeline = make_pipeline(PCA(n_components=3), KNeighborsClassifier()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps()[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Example #30
Source File: tests.py From scikit-rebate with MIT License | 5 votes |
def test_surfstar_pipeline_cont_endpoint(): """Check: Data (Continuous Endpoint): SURF* works in a sklearn pipeline""" np.random.seed(9238745) clf = make_pipeline(SURFstar(n_features_to_select=2), RandomForestRegressor(n_estimators=100, n_jobs=-1)) assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3, n_jobs=-1))) < 0.5