Python sklearn.pipeline.FeatureUnion() Examples
The following are 30
code examples of sklearn.pipeline.FeatureUnion().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.pipeline
, or try the search function
.
Example #1
Source File: 1_problem.py From pandas-feature-union with MIT License | 7 votes |
def main(): raw_data = load_iris() data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"]) pipeline = FeatureUnion([ ("1", make_pipeline( FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]), # other transformations )), ("2", make_pipeline( FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]), # other transformations )) ]) X = pipeline.fit_transform(data) print(X["sepal length (cm)"].mean()) print(X["sepal width (cm)"].mean())
Example #2
Source File: 2_transform_solution.py From pandas-feature-union with MIT License | 7 votes |
def main(): raw_data = load_iris() data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"]) data.loc[:, "class"] = raw_data["target"] pipeline = FeatureUnion([ ("1", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]), # other transformations )), ("2", make_pipeline( PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]), # other transformations )) ]) X = pipeline.fit_transform(data) print(X["sepal length (cm)"].mean()) print(X["sepal width (cm)"].mean())
Example #3
Source File: train.py From skorch with BSD 3-Clause "New" or "Revised" License | 7 votes |
def get_model(with_pipeline=False): """Get a multi-layer perceptron model. Optionally, put it in a pipeline that scales the data. """ model = NeuralNetClassifier(MLPClassifier) if with_pipeline: model = Pipeline([ ('scale', FeatureUnion([ ('minmax', MinMaxScaler()), ('normalize', Normalizer()), ])), ('select', SelectKBest(k=N_FEATURES)), # keep input size constant ('net', model), ]) return model
Example #4
Source File: transform_utils.py From professional-services with Apache License 2.0 | 6 votes |
def transform(self, X): if self.func is None: return X if self.signature: input_dims, output_dims = _parse_gufunc_signature( signature=self.signature) else: input_dims, output_dims = [()], [()] # This below ensures FeatureUnion's concatenation (hstack) does not fail # because of resulting arrays having different number of dims if len(input_dims[0]) == 1 and len(output_dims[0]) == 0: X = np.expand_dims(X, axis=1) # Add one extra dimension if (n)->() elif len(input_dims[0]) == 0 and len(output_dims[0]) == 1: X = np.squeeze(X, axis=1) # Remove singleton dimension if ()->(n) return np.vectorize(self.func, otypes=[np.float], signature=self.signature)( X)
Example #5
Source File: feature_union.py From mercari-solution with MIT License | 6 votes |
def fit(self, X, y=None): """Fit all transformers using X. Parameters ---------- X : iterable or array-like, depending on transformers Input data, used to fit transformers. y : array-like, shape (n_samples, ...), optional Targets for supervised learning. Returns ------- self : FeatureUnion This estimator """ self.transformer_list = list(self.transformer_list) self._validate_transformers() with Pool(self.n_jobs) as pool: transformers = pool.starmap(_fit_one_transformer, ((trans, X[trans.steps[0][1].columns], y) for _, trans, _ in self._iter())) self._update_transformer_list(transformers) return self
Example #6
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 6 votes |
def _filter(obj): if isinstance(obj, DataFrameMapper): obj.features = _filter_steps(obj.features) if hasattr(obj, "built_features"): if obj.built_features is not None: obj.built_features = _filter_steps(obj.built_features) elif isinstance(obj, ColumnTransformer): obj.transformers = _filter_steps(obj.transformers) obj.remainder = _filter(obj.remainder) if hasattr(obj, "transformers_"): obj.transformers_ = _filter_steps(obj.transformers_) elif isinstance(obj, FeatureUnion): obj.transformer_list = _filter_steps(obj.transformer_list) elif isinstance(obj, Pipeline): obj.steps = _filter_steps(obj.steps) elif isinstance(obj, SelectorMixin): return SelectorProxy(obj) elif isinstance(obj, list): return [_filter(e) for e in obj] return obj
Example #7
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 6 votes |
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA from sklearn.kernel_approximation import Nystroem from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline union = FeatureUnion([("pca", PCA(n_components=1)), ("nys", Nystroem(n_components=2, random_state=42))]) sklearn_pipeline = make_pipeline(union, KNeighborsClassifier()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Example #8
Source File: field_types.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0 | 6 votes |
def build_vectorization_pipeline(self) -> Tuple[List[Tuple[str, Any]], Callable[[], List[str]]]: vect_numerator = vectorizers.NumberVectorizer() vect_denominator = vectorizers.NumberVectorizer() def get_feature_names_(vect_numerator, vect_denominator): def res(): return ['numerator_' + str(c) for c in vect_numerator.get_feature_names()] \ + ['denominator_' + str(c) for c in vect_denominator.get_feature_names()] return res return [ ('vect', FeatureUnion(transformer_list=[ ('numerator', Pipeline([ ('selector', vectorizers.DictItemSelector(item='numerator')), ('vect', vect_numerator), ])), ('denominator', Pipeline([ ('selector', vectorizers.DictItemSelector(item='denominator')), ('vect', vect_denominator), ])) ])) ], get_feature_names_(vect_numerator, vect_denominator)
Example #9
Source File: test_investigate.py From sklearn-onnx with MIT License | 6 votes |
def test_simple_feature_union(self): data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) model = FeatureUnion([("scaler1", StandardScaler()), ("scaler2", RobustScaler())]) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps(model, "feature union", [("input", FloatTensorType([None, 2]))]) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: onnx_step = step['onnx_step'] sess = onnxruntime.InferenceSession(onnx_step.SerializeToString()) onnx_outputs = sess.run(None, {'input': data}) onnx_output = onnx_outputs[0] skl_outputs = step['model']._debug.outputs['transform'] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs)
Example #10
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_default(self): data = load_iris() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('standard', StandardScaler()), ('minmax', MinMaxScaler())]).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', FloatTensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(X_test, model, model_onnx, basename="SklearnFeatureUnionDefault")
Example #11
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca': 10, 'svd': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', Int64TensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights1-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #12
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_2(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca1': 10, 'svd2': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', FloatTensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights2-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #13
Source File: _base.py From ibex with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __add__(self, other): """ Returns: :py:class:`ibex.sklearn.pipeline.FeatureUnion` """ if isinstance(self, FeatureUnion): self_features = [e[1] for e in self.transformer_list] else: self_features = [self] if isinstance(other, FeatureUnion): other_features = [e[1] for e in other.transformer_list] else: other_features = [other] combined = self_features + other_features return FeatureUnion(_make_pipeline_steps(combined))
Example #14
Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_FeatureUnion_pipeline(): # pipeline with segmentation plus multiple feature extraction steps = [ ('segment', RandomIntervalSegmenter(n_intervals=3)), ('transform', FeatureUnion([ ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))) ])), ('clf', DecisionTreeClassifier()) ] clf = Pipeline(steps) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Example #15
Source File: test_debug_pipeline.py From scikit-lego with MIT License | 6 votes |
def test_feature_union(caplog, named_steps): pipe_w_default_log_callback = DebugPipeline(named_steps, log_callback="default") pipe_w_custom_log_callback = DebugPipeline(named_steps, log_callback=custom_log_callback) pipe_union = FeatureUnion( [ ("pipe_w_default_log_callback", pipe_w_default_log_callback), ("pipe_w_custom_log_callback", pipe_w_custom_log_callback), ] ) caplog.clear() with caplog.at_level(logging.INFO): pipe_union.fit(IRIS.data, IRIS.target) assert caplog.text, f"Log should be none empty: {caplog.text}" for pipe in [pipe_w_default_log_callback, pipe_w_custom_log_callback]: for _, step in pipe.steps[:-1]: assert str(step) in caplog.text, f"{step} should be in: {caplog.text}" assert ( caplog.text.count(str(step)) == 2 ), f"{step} should be once in {caplog.text}"
Example #16
Source File: 04_sent.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 6 votes |
def create_union_model(params=None): def preprocessor(tweet): tweet = tweet.lower() for k in emo_repl_order: tweet = tweet.replace(k, emo_repl[k]) for r, repl in re_repl.iteritems(): tweet = re.sub(r, repl, tweet) return tweet.replace("-", " ").replace("_", " ") tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor, analyzer="word") ling_stats = LinguisticVectorizer() all_features = FeatureUnion( [('ling', ling_stats), ('tfidf', tfidf_ngrams)]) #all_features = FeatureUnion([('tfidf', tfidf_ngrams)]) #all_features = FeatureUnion([('ling', ling_stats)]) clf = MultinomialNB() pipeline = Pipeline([('all', all_features), ('clf', clf)]) if params: pipeline.set_params(**params) return pipeline
Example #17
Source File: test_pipeline.py From skits with MIT License | 6 votes |
def test_multiouput_forecast(self): # TODO: Make this a real test steps = [ ("pre_horizon", HorizonTransformer(horizon=4)), ("pre_imputer", ReversibleImputer(y_only=True)), ( "features", FeatureUnion( [("ar_transformer", AutoregressiveTransformer(num_lags=3))] ), ), ("post_lag_imputer", ReversibleImputer()), ("regressor", LinearRegression()), ] pipeline = ForecasterPipeline(steps) l = np.linspace(0, 1, 100) y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, 0.1, size=100) pipeline.fit(y[:, np.newaxis], y) pipeline.forecast(y[:, np.newaxis], 20)
Example #18
Source File: test_pipeline.py From skits with MIT License | 6 votes |
def test_multiouput_prediction(self): # TODO: Make this a real test steps = [ ("pre_horizon", HorizonTransformer(horizon=4)), ("pre_imputer", ReversibleImputer(y_only=True)), ( "features", FeatureUnion( [("ar_transformer", AutoregressiveTransformer(num_lags=3))] ), ), ("post_lag_imputer", ReversibleImputer()), ("regressor", LinearRegression()), ] pipeline = ForecasterPipeline(steps) l = np.linspace(0, 1, 100) y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, 0.1, size=100) pipeline.fit(y[:, np.newaxis], y) pipeline.predict(y[:, np.newaxis], to_scale=True, refit=True)
Example #19
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 6 votes |
def test_export_to_sklearn_pipeline3(self): from lale.lib.lale import ConcatFeatures from lale.lib.sklearn import PCA from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC from sklearn.feature_selection import SelectKBest from lale.lib.sklearn import Nystroem from sklearn.pipeline import FeatureUnion lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3)) & (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression() trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train) sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline() self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion) self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest) from sklearn.linear_model import LogisticRegression self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression) self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Example #20
Source File: pipeline.py From sparkit-learn with Apache License 2.0 | 6 votes |
def make_sparkunion(*transformers): """Construct a FeatureUnion from the given transformers. This is a shorthand for the FeatureUnion constructor; it does not require, and does not permit, naming the transformers. Instead, they will be given names automatically based on their types. It also does not allow weighting. Examples -------- >>> from sklearn.decomposition import PCA, TruncatedSVD >>> make_union(PCA(), TruncatedSVD()) # doctest: +NORMALIZE_WHITESPACE FeatureUnion(n_jobs=1, transformer_list=[('pca', PCA(copy=True, n_components=None, whiten=False)), ('truncatedsvd', TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=None, tol=0.0))], transformer_weights=None) Returns ------- f : FeatureUnion """ return SparkFeatureUnion(_name_estimators(transformers))
Example #21
Source File: test_pipeline.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_result_weight(self): X, Z = self.make_text_rdd(2) loc_char = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3)) dist_char = SparkCountVectorizer(analyzer="char_wb", ngram_range=(3, 3)) loc_word = CountVectorizer(analyzer="word") dist_word = SparkCountVectorizer(analyzer="word") loc_union = FeatureUnion([ ("chars", loc_char), ("words", loc_word) ], transformer_weights={"words": 10}) dist_union = SparkFeatureUnion([ ("chars", dist_char), ("words", dist_word) ], transformer_weights={"words": 10}) loc_union.fit(X) dist_union.fit(Z) X_transformed = loc_union.transform(X) Z_transformed = sp.vstack(dist_union.transform(Z).collect()) assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
Example #22
Source File: feature_extraction.py From mne-features with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _apply_extractor(extractor, X, return_as_df): """Utility function to apply features extractor to ndarray X. Parameters ---------- extractor : Instance of :class:`~sklearn.pipeline.FeatureUnion` or :class:`~sklearn.pipeline.Pipeline`. X : ndarray, shape (n_channels, n_times) return_as_df : bool Returns ------- X : ndarray, shape (n_features,) feature_names : list of str | None Not None, only if ``return_as_df`` is True. """ X = extractor.fit_transform(X) feature_names = None if return_as_df: feature_names = extractor.get_feature_names() return X, feature_names
Example #23
Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_FeatureUnion(): X, y = load_gunpoint(return_X_y=True) ft = FunctionTransformer(func=np.mean, validate=False) t = RowTransformer(ft) fu = FeatureUnion([ ('mean', t), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))) ]) Xt = fu.fit_transform(X, y) assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list))
Example #24
Source File: test_feature_importances_.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_feature_importances_single_feature_interval_and_estimator(): random_state = 1234 # Compute using default method features = [np.mean] steps = [('transform', RandomIntervalFeatureExtractor( n_intervals=1, features=features, random_state=random_state)), ('clf', DecisionTreeClassifier())] base_estimator = Pipeline(steps) clf1 = TimeSeriesForestClassifier(estimator=base_estimator, random_state=random_state, n_estimators=1) clf1.fit(X_train, y_train) # Extract the interval and the estimator, and compute using pipelines intervals = clf1.estimators_[0].steps[0][1].intervals_ steps = [ ('segment', IntervalSegmenter(intervals)), ('transform', FeatureUnion([ ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))) ])), ('clf', clone(clf1.estimators_[0].steps[-1][1])) ] clf2 = Pipeline(steps) clf2.fit(X_train, y_train) # Check for feature importances obtained from the estimators fi_expected = clf1.estimators_[0].steps[-1][1].feature_importances_ fi_actual = clf2.steps[-1][1].feature_importances_ np.testing.assert_array_equal(fi_actual, fi_expected) # Check for 4 more complex cases with 3 features, with both numbers of # intervals and estimators varied from 1 to 2. # Feature importances from each estimator on each interval, and # normalised feature values of the time series are checked using # different but equivalent implementations
Example #25
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_predict_transform(self): predict_transformer = FeatureUnion([ ("identity", FunctionTransformer(None)), ("log10", FunctionTransformer(numpy.log10)) ]) pipeline = PMMLPipeline([("estimator", DummyRegressor())], predict_transformer = predict_transformer) X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"]) y = Series([0.5, 1.0, 1.5], name = "y") pipeline.fit(X, y) y_pred = [1.0, 1.0, 1.0] y_predt = [1.0, 1.0, numpy.log10(1.0)] self.assertEqual(y_pred, pipeline.predict(X).tolist()) self.assertEqual([y_predt for i in range(0, 3)], pipeline.predict_transform(X).tolist())
Example #26
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_timedelta_days(self): X = DataFrame([["2018-12-31", "2019-01-01"], ["2019-01-31", "2019-01-01"]], columns = ["left", "right"]) pipeline = clone(Pipeline([ ("union", FeatureUnion([ ("left_mapper", DataFrameMapper([ ("left", [DateDomain(), DaysSinceYearTransformer(year = 2010)]) ])), ("right_mapper", DataFrameMapper([ ("right", [DateDomain(), DaysSinceYearTransformer(year = 2010)]) ])) ])), ("expression", Alias(ExpressionTransformer("X[0] - X[1]"), "delta(left, right)", prefit = True)) ])) Xt = pipeline.fit_transform(X) self.assertEqual([[-1], [30]], Xt.tolist())
Example #27
Source File: methods.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def feature_union(names, steps, weights): """Reconstruct a FeatureUnion from names, steps, and weights""" steps, times = zip(*map(_maybe_timed, steps)) fit_time = sum(times) if any(s is FIT_FAILURE for s in steps): fit_est = FIT_FAILURE else: fit_est = FeatureUnion(list(zip(names, steps)), transformer_weights=weights) return fit_est, fit_time
Example #28
Source File: _parse.py From sklearn-onnx with MIT License | 5 votes |
def build_sklearn_parsers_map(): map_parser = { pipeline.Pipeline: _parse_sklearn_pipeline, pipeline.FeatureUnion: _parse_sklearn_feature_union, GaussianProcessRegressor: _parse_sklearn_gaussian_process, GridSearchCV: _parse_sklearn_grid_search_cv, } if ColumnTransformer is not None: map_parser[ColumnTransformer] = _parse_sklearn_column_transformer for tmodel in sklearn_classifier_list: if tmodel not in [LinearSVC]: map_parser[tmodel] = _parse_sklearn_classifier return map_parser
Example #29
Source File: lr_model.py From autogluon with Apache License 2.0 | 5 votes |
def preprocess_train(self, X, feature_types, vect_max_features): transformer_list = [] if len(feature_types['language']) > 0: pipeline = Pipeline(steps=[ ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])), ("vectorizer", TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize)) ]) transformer_list.append(('vect', pipeline)) if len(feature_types['onehot']) > 0: pipeline = Pipeline(steps=[ ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])), ]) transformer_list.append(('cats', pipeline)) if len(feature_types['continuous']) > 0: pipeline = Pipeline(steps=[ ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])), ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])), ('scaler', StandardScaler()) ]) transformer_list.append(('cont', pipeline)) if len(feature_types['skewed']) > 0: pipeline = Pipeline(steps=[ ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])), ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])), ('quantile', QuantileTransformer(output_distribution='normal')), # Or output_distribution = 'uniform' ]) transformer_list.append(('skew', pipeline)) self.pipeline = FeatureUnion(transformer_list=transformer_list) self.pipeline.fit(X)
Example #30
Source File: test_pipeline.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_feature_union_weights(): # test feature union with transformer weights iris = load_iris() X = iris.data y = iris.target pca = PCA(n_components=2, svd_solver='randomized', random_state=0) select = SelectKBest(k=1) # test using fit followed by transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) fs.fit(X, y) X_transformed = fs.transform(X) # test using fit_transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) X_fit_transformed = fs.fit_transform(X, y) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)], transformer_weights={"mock": 10}) X_fit_transformed_wo_method = fs.fit_transform(X, y) # check against expected result # We use a different pca object to control the random_state stream assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) assert_array_almost_equal(X_fit_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_fit_transformed[:, -1], select.fit_transform(X, y).ravel()) assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))