Python sklearn.compose.make_column_transformer() Examples
The following are 9
code examples of sklearn.compose.make_column_transformer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.compose
, or try the search function
.
Example #1
Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 8 votes |
def get_estimator(): categorical_cols = ['Sex', 'Pclass', 'Embarked'] numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare'] preprocessor = make_column_transformer( (OneHotEncoder(handle_unknown='ignore'), categorical_cols), (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols), ) pipeline = Pipeline([ ('transformer', preprocessor), ('classifier', LogisticRegression()), ]) return pipeline
Example #2
Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 7 votes |
def get_estimator(): categorical_cols = ['Sex', 'Pclass', 'Embarked'] numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare'] preprocessor = make_column_transformer( (OneHotEncoder(handle_unknown='ignore'), categorical_cols), (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols), ) pipeline = Pipeline([ ('transformer', preprocessor), ('classifier', LogisticRegression()), ]) return pipeline
Example #3
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_make_column_transformer_kwargs(): scaler = StandardScaler() norm = Normalizer() ct = make_column_transformer((scaler, 'first'), (norm, ['second']), n_jobs=3, remainder='drop', sparse_threshold=0.5) assert_equal(ct.transformers, make_column_transformer( (scaler, 'first'), (norm, ['second'])).transformers) assert_equal(ct.n_jobs, 3) assert_equal(ct.remainder, 'drop') assert_equal(ct.sparse_threshold, 0.5) # invalid keyword parameters should raise an error message assert_raise_message( TypeError, 'Unknown keyword arguments: "transformer_weights"', make_column_transformer, (scaler, 'first'), (norm, ['second']), transformer_weights={'pca': 10, 'Transf': 1} )
Example #4
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_column_transformer_mixed_cols_sparse(): df = np.array([['a', 1, True], ['b', 2, False]], dtype='O') ct = make_column_transformer( (OneHotEncoder(), [0]), ('passthrough', [1, 2]), sparse_threshold=1.0 ) # this shouldn't fail, since boolean can be coerced into a numeric # See: https://github.com/scikit-learn/scikit-learn/issues/11912 X_trans = ct.fit_transform(df) assert X_trans.getformat() == 'csr' assert_array_equal(X_trans.toarray(), np.array([[1, 0, 1, 1], [0, 1, 2, 0]])) ct = make_column_transformer( (OneHotEncoder(), [0]), ('passthrough', [0]), sparse_threshold=1.0 ) with pytest.raises(ValueError, match="For a sparse output, all columns should"): # this fails since strings `a` and `b` cannot be # coerced into a numeric. ct.fit_transform(df)
Example #5
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_column_transformer(): scaler = StandardScaler() norm = Normalizer() ct = make_column_transformer((scaler, 'first'), (norm, ['second'])) names, transformers, columns = zip(*ct.transformers) assert_equal(names, ("standardscaler", "normalizer")) assert_equal(transformers, (scaler, norm)) assert_equal(columns, ('first', ['second'])) # XXX remove in v0.22 with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): ct1 = make_column_transformer(([0], norm)) ct2 = make_column_transformer((norm, [0])) X_array = np.array([[0, 1, 2], [2, 4, 6]]).T assert_almost_equal(ct1.fit_transform(X_array), ct2.fit_transform(X_array)) with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): make_column_transformer(('first', 'drop')) with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): make_column_transformer(('passthrough', 'passthrough'), ('first', 'drop'))
Example #6
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_column_transformer_pandas(): pd = pytest.importorskip('pandas') X_array = np.array([[0, 1, 2], [2, 4, 6]]).T X_df = pd.DataFrame(X_array, columns=['first', 'second']) norm = Normalizer() # XXX remove in v0.22 with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): ct1 = make_column_transformer((X_df.columns, norm)) ct2 = make_column_transformer((norm, X_df.columns)) assert_almost_equal(ct1.fit_transform(X_df), ct2.fit_transform(X_df))
Example #7
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_column_transformer_remainder_transformer(): scaler = StandardScaler() norm = Normalizer() remainder = StandardScaler() ct = make_column_transformer((scaler, 'first'), (norm, ['second']), remainder=remainder) assert ct.remainder == remainder
Example #8
Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_estimator(): merge_transformer = FunctionTransformer(_merge_external_data, validate=False) categorical_cols = ['Arrival', 'Departure'] drop_col = ['DateOfDeparture'] preoprocessor = make_column_transformer( (OneHotEncoder(handle_unknown='ignore'), categorical_cols), ('drop', drop_col), remainder='passthrough' ) pipeline = Pipeline(steps=[ ('merge', merge_transformer), ('transfomer', preoprocessor), ('regressor', RandomForestRegressor(n_estimators=10, max_depth=10, max_features=10)), ]) return pipeline
Example #9
Source File: sklearn_mnist_preprocessor.py From aws-step-functions-data-science-sdk-python with Apache License 2.0 | 5 votes |
def create_preprocessing_pipeline(num_columns): preprocessor = make_column_transformer( (np.arange(num_columns), StandardScaler()), remainder='passthrough' ) return preprocessor