Python sklearn.preprocessing.FunctionTransformer() Examples
The following are 30
code examples of sklearn.preprocessing.FunctionTransformer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License | 7 votes |
def test_ColumnTransformer_pipeline(): X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) # using Identity function transformers (transform series to series) def id_func(X): return X column_transformer = ColumnTransformer([ ('id0', FunctionTransformer(func=id_func, validate=False), ['dim_0']), ('id1', FunctionTransformer(func=id_func, validate=False), ['dim_1']) ]) steps = [ ('extract', column_transformer), ('tabularise', Tabularizer()), ('classify', RandomForestClassifier(n_estimators=2, random_state=1))] model = Pipeline(steps=steps) model.fit(X_train, y_train) y_pred = model.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Example #2
Source File: test_RandomIntervalFeatureExtractor.py From sktime with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_different_implementations(): random_state = 1233 X_train, y_train = make_classification_problem() # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_state) tran2 = RowTransformer(FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_state) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B) # Compare with transformer pipeline using TSFeatureUnion.
Example #3
Source File: test_RandomIntervalFeatureExtractor.py From sktime with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_different_pipelines(): random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_state)), ('transform', FeatureUnion([ ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))), ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))), ])), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std, time_series_slope], random_state=random_state) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Example #4
Source File: sindy.py From sparsereg with MIT License | 6 votes |
def __init__( self, alpha=1.0, threshold=0.1, degree=3, operators=None, dt=1.0, n_jobs=1, derivative=None, feature_names=None, kw={}, ): self.alpha = alpha self.threshold = threshold self.degree = degree self.operators = operators self.n_jobs = n_jobs self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt}) self.feature_names = feature_names self.kw = kw
Example #5
Source File: common_tabular_tests.py From interpret-community with MIT License | 6 votes |
def _get_transformations_one_to_many_greater(self, feature_names): # results in number of features greater than original features # copy all features except last one. For last one, replicate columns to create 3 more features transformations = [] feature_names = list(feature_names) index = 0 for f in feature_names[:-1]: transformations.append(("{}".format(index), "passthrough", [f])) index += 1 def copy_func(x): return np.tile(x, (1, 3)) copy_transformer = FunctionTransformer(copy_func) transformations.append(("copy_transformer", copy_transformer, [feature_names[-1]])) return ColumnTransformer(transformations)
Example #6
Source File: dataloader.py From models with MIT License | 6 votes |
def __init__(self, pos_features, pipeline_obj_path): """ Args: pos_features: list of positional features to use pipeline_obj_path: path to the serialized pipeline obj_path """ self.pos_features = pos_features self.pipeline_obj_path = pipeline_obj_path # deserialize the pickle file with open(self.pipeline_obj_path, "rb") as f: pipeline_obj = pickle.load(f) self.POS_FEATURES = pipeline_obj[0] self.minmax_scaler = pipeline_obj[1] self.imp = pipeline_obj[2] self.funct_transform = FunctionTransformer(func=sign_log_func, inverse_func=sign_log_func_inverse) # for simplicity, assume all current pos_features are the # same as from before assert self.POS_FEATURES == self.pos_features
Example #7
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.preprocessing.Binarizer, pp.Binarizer) self.assertIs(df.preprocessing.FunctionTransformer, pp.FunctionTransformer) self.assertIs(df.preprocessing.Imputer, pp.Imputer) self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer) self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer) self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder) self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer) self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler) self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler) self.assertIs(df.preprocessing.Normalizer, pp.Normalizer) self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder) self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures) self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler) self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
Example #8
Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_FeatureUnion_pipeline(): # pipeline with segmentation plus multiple feature extraction steps = [ ('segment', RandomIntervalSegmenter(n_intervals=3)), ('transform', FeatureUnion([ ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))) ])), ('clf', DecisionTreeClassifier()) ] clf = Pipeline(steps) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Example #9
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_predict_proba_transform(self): predict_proba_transformer = FunctionTransformer(numpy.log) pipeline = PMMLPipeline([("estimator", DummyClassifier(strategy = "prior"))], predict_proba_transformer = predict_proba_transformer) X = DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], columns = ["x"]) y = Series(["green", "red", "yellow", "green", "red", "green"], name = "y") pipeline.fit(X, y) self.assertEqual(["green", "red", "yellow"], pipeline._final_estimator.classes_.tolist()) y_proba = [3 / 6.0, 2 / 6.0, 1 / 6.0] y_probat = [numpy.log(x) for x in y_proba] self.assertEqual([y_proba for i in range(0, 6)], pipeline.predict_proba(X).tolist()) self.assertEqual([y_proba + y_probat for i in range(0, 6)], pipeline.predict_proba_transform(X).tolist())
Example #10
Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_row_transformer_function_transformer_series_to_primitives(): X, y = load_gunpoint(return_X_y=True) ft = FunctionTransformer(func=np.mean, validate=False) t = RowTransformer(ft) Xt = t.fit_transform(X, y) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], float) # check series-to-primitive transforms
Example #11
Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_FeatureUnion(): X, y = load_gunpoint(return_X_y=True) ft = FunctionTransformer(func=np.mean, validate=False) t = RowTransformer(ft) fu = FeatureUnion([ ('mean', t), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))) ]) Xt = fu.fit_transform(X, y) assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list))
Example #12
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_predict_transform(self): predict_transformer = FeatureUnion([ ("identity", FunctionTransformer(None)), ("log10", FunctionTransformer(numpy.log10)) ]) pipeline = PMMLPipeline([("estimator", DummyRegressor())], predict_transformer = predict_transformer) X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"]) y = Series([0.5, 1.0, 1.5], name = "y") pipeline.fit(X, y) y_pred = [1.0, 1.0, 1.0] y_predt = [1.0, 1.0, numpy.log10(1.0)] self.assertEqual(y_pred, pipeline.predict(X).tolist()) self.assertEqual([y_predt for i in range(0, 3)], pipeline.predict_transform(X).tolist())
Example #13
Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_row_transformer_function_transformer_series_to_series(): X, y = load_gunpoint(return_X_y=True) # series-to-series transform function def powerspectrum(x): fft = np.fft.fft(x) ps = fft.real * fft.real + fft.imag * fft.imag return ps[:ps.shape[0] // 2] ft = FunctionTransformer(func=powerspectrum, validate=False) t = RowTransformer(ft) Xt = t.fit_transform(X, y) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], ( pd.Series, np.ndarray)) # check series-to-series transforms
Example #14
Source File: test_sklearn_model_export.py From mlflow with Apache License 2.0 | 5 votes |
def sklearn_custom_transformer_model(sklearn_knn_model): def transform(vec): print("Invoking custom transformer!") return vec + 1 transformer = SKFunctionTransformer(transform, validate=True) pipeline = SKPipeline([("custom_transformer", transformer), ("knn", sklearn_knn_model.model)]) return ModelWithData(pipeline, inference_data=datasets.load_iris().data[:, :2])
Example #15
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_FunctionTransformer(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) mod1 = df.pp.FunctionTransformer(func=lambda x: x + 1) df.fit(mod1) result = df.transform(mod1) exp = df.copy() exp.data = exp.data + 1 self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, exp)
Example #16
Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_np_log(): X = np.arange(10).reshape((5, 2)) # Test that the numpy.log example still works. assert_array_equal( FunctionTransformer(np.log1p).transform(X), np.log1p(X), )
Example #17
Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_kw_arg(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) # Test that rounding is correct assert_array_equal(F.transform(X), np.around(X, decimals=3))
Example #18
Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_kw_arg_update(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) F.kw_args['decimals'] = 1 # Test that rounding is correct assert_array_equal(F.transform(X), np.around(X, decimals=1))
Example #19
Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_kw_arg_reset(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) F.kw_args = dict(decimals=1) # Test that rounding is correct assert_array_equal(F.transform(X), np.around(X, decimals=1))
Example #20
Source File: mercari_golf.py From mercari-solution with MIT License | 5 votes |
def main(): vectorizer = make_union( on_field('name', Tfidf(max_features=100000, token_pattern='\w+')), on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))), on_field(['shipping', 'item_condition_id'], FunctionTransformer(to_records, validate=False), DictVectorizer()), n_jobs=4) y_scaler = StandardScaler() with timer('process train'): train = pd.read_table('../input/train.tsv') train = train[train['price'] > 0].reset_index(drop=True) cv = KFold(n_splits=20, shuffle=True, random_state=42) train_ids, valid_ids = next(cv.split(train)) train, valid = train.iloc[train_ids], train.iloc[valid_ids] y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1))) X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32) print(f'X_train: {X_train.shape} of {X_train.dtype}') del train with timer('process valid'): X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32) with ThreadPool(processes=4) as pool: Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]] xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2 y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0) y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0]) print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred))))
Example #21
Source File: test_transformers.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def test_multiply_by_function_transformer(self): from gordo.machine.model.transformer_funcs.general import multiply_by # Provide a require argument tf = FunctionTransformer(func=multiply_by, kw_args={"factor": 2}) self._validate_transformer(tf) # Ignore the required argument tf = FunctionTransformer(func=multiply_by) with self.assertRaises(TypeError): self._validate_transformer(tf)
Example #22
Source File: test_target.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_transform_target_regressor_1d_transformer(X, y): # All transformer in scikit-learn expect 2D data. FunctionTransformer with # validate=False lift this constraint without checking that the input is a # 2D vector. We check the consistency of the data shape using a 1D and 2D y # array. transformer = FunctionTransformer(func=lambda x: x + 1, inverse_func=lambda x: x - 1, validate=False) regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_shifted_by_one(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
Example #23
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_np_log(): X = np.arange(10).reshape((5, 2)) # Test that the numpy.log example still works. assert_array_equal( FunctionTransformer(np.log1p).transform(X), np.log1p(X), )
Example #24
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_kw_arg(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) # Test that rounding is correct assert_array_equal(F.transform(X), np.around(X, decimals=3))
Example #25
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_kw_arg_update(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) F.kw_args['decimals'] = 1 # Test that rounding is correct assert_array_equal(F.transform(X), np.around(X, decimals=1))
Example #26
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_inverse_transform(): X = np.array([1, 4, 9, 16]).reshape((2, 2)) # Test that inverse_transform works correctly F = FunctionTransformer( func=np.sqrt, inverse_func=np.around, inv_kw_args=dict(decimals=3), ) assert_array_equal( F.inverse_transform(F.transform(X)), np.around(np.sqrt(X), decimals=3), )
Example #27
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True, validate=True) assert_warns_message(UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'.", trans.fit, X) trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True, validate=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense)
Example #28
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_function_transformer_future_warning(validate, expected_warning): # FIXME: to be removed in 0.22 X = np.random.randn(100, 10) transformer = FunctionTransformer(validate=validate) with pytest.warns(expected_warning) as results: transformer.fit_transform(X) if expected_warning is None: assert len(results) == 0
Example #29
Source File: test_function_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_function_transformer_frame(): pd = pytest.importorskip('pandas') X_df = pd.DataFrame(np.random.randn(100, 10)) transformer = FunctionTransformer(validate=False) X_df_trans = transformer.fit_transform(X_df) assert hasattr(X_df_trans, 'loc')
Example #30
Source File: feature_extraction.py From mne-features with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_params(self, deep=True): """Get the parameters (if any) of the given feature function. Parameters ---------- deep : bool (default: True) If True, the method will get the parameters of the transformer. (See :class:`~sklearn.preprocessing.FunctionTransformer`). """ func_to_inspect = _get_python_func(self.func) # Get code object from the function if hasattr(func_to_inspect, 'func_code'): func_code = func_to_inspect.func_code else: func_code = func_to_inspect.__code__ args, _, _ = getargs(func_code) # Get defaults from the function if hasattr(func_to_inspect, 'defaults'): defaults = func_to_inspect.func_defaults else: defaults = func_to_inspect.__defaults__ if defaults is None: return dict() else: n_defaults = len(defaults) func_params = {key: value for key, value in zip(args[-n_defaults:], defaults)} if self.params is not None: func_params.update(self.params) return func_params