Python sklearn.compose.make_column_transformer() Examples

The following are 9 code examples of sklearn.compose.make_column_transformer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.compose , or try the search function

Example #1

Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License

8 votes

vote down

vote up

def get_estimator():

    categorical_cols = ['Sex', 'Pclass', 'Embarked']
    numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']

    preprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
    )

    pipeline = Pipeline([
        ('transformer', preprocessor),
        ('classifier', LogisticRegression()),
    ])

    return pipeline

Example #2

Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License

7 votes

vote down

vote up

def get_estimator():

    categorical_cols = ['Sex', 'Pclass', 'Embarked']
    numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']

    preprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
    )

    pipeline = Pipeline([
        ('transformer', preprocessor),
        ('classifier', LogisticRegression()),
    ])

    return pipeline

Example #3

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

vote down

vote up

def test_make_column_transformer_kwargs():
    scaler = StandardScaler()
    norm = Normalizer()
    ct = make_column_transformer((scaler, 'first'), (norm, ['second']),
                                 n_jobs=3, remainder='drop',
                                 sparse_threshold=0.5)
    assert_equal(ct.transformers, make_column_transformer(
        (scaler, 'first'), (norm, ['second'])).transformers)
    assert_equal(ct.n_jobs, 3)
    assert_equal(ct.remainder, 'drop')
    assert_equal(ct.sparse_threshold, 0.5)
    # invalid keyword parameters should raise an error message
    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "transformer_weights"',
        make_column_transformer, (scaler, 'first'), (norm, ['second']),
        transformer_weights={'pca': 10, 'Transf': 1}
    )

Example #4

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

vote down

vote up

def test_column_transformer_mixed_cols_sparse():
    df = np.array([['a', 1, True],
                   ['b', 2, False]],
                  dtype='O')

    ct = make_column_transformer(
        (OneHotEncoder(), [0]),
        ('passthrough', [1, 2]),
        sparse_threshold=1.0
    )

    # this shouldn't fail, since boolean can be coerced into a numeric
    # See: https://github.com/scikit-learn/scikit-learn/issues/11912
    X_trans = ct.fit_transform(df)
    assert X_trans.getformat() == 'csr'
    assert_array_equal(X_trans.toarray(), np.array([[1, 0, 1, 1],
                                                    [0, 1, 2, 0]]))

    ct = make_column_transformer(
        (OneHotEncoder(), [0]),
        ('passthrough', [0]),
        sparse_threshold=1.0
    )
    with pytest.raises(ValueError,
                       match="For a sparse output, all columns should"):
        # this fails since strings `a` and `b` cannot be
        # coerced into a numeric.
        ct.fit_transform(df)

Example #5

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

vote down

vote up

def test_make_column_transformer():
    scaler = StandardScaler()
    norm = Normalizer()
    ct = make_column_transformer((scaler, 'first'), (norm, ['second']))
    names, transformers, columns = zip(*ct.transformers)
    assert_equal(names, ("standardscaler", "normalizer"))
    assert_equal(transformers, (scaler, norm))
    assert_equal(columns, ('first', ['second']))

    # XXX remove in v0.22
    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        ct1 = make_column_transformer(([0], norm))
    ct2 = make_column_transformer((norm, [0]))
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    assert_almost_equal(ct1.fit_transform(X_array),
                        ct2.fit_transform(X_array))

    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        make_column_transformer(('first', 'drop'))

    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        make_column_transformer(('passthrough', 'passthrough'),
                                ('first', 'drop'))

Example #6

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

vote down

vote up

def test_make_column_transformer_pandas():
    pd = pytest.importorskip('pandas')
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    X_df = pd.DataFrame(X_array, columns=['first', 'second'])
    norm = Normalizer()
    # XXX remove in v0.22
    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        ct1 = make_column_transformer((X_df.columns, norm))
    ct2 = make_column_transformer((norm, X_df.columns))
    assert_almost_equal(ct1.fit_transform(X_df),
                        ct2.fit_transform(X_df))

Example #7

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

vote down

vote up

def test_make_column_transformer_remainder_transformer():
    scaler = StandardScaler()
    norm = Normalizer()
    remainder = StandardScaler()
    ct = make_column_transformer((scaler, 'first'), (norm, ['second']),
                                 remainder=remainder)
    assert ct.remainder == remainder

Example #8

Source File: estimator.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License

5 votes

vote down

vote up

def get_estimator():
    merge_transformer = FunctionTransformer(_merge_external_data,
                                            validate=False)
    categorical_cols = ['Arrival', 'Departure']
    drop_col = ['DateOfDeparture']
    preoprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('drop', drop_col),
        remainder='passthrough'
    )
    pipeline = Pipeline(steps=[
        ('merge', merge_transformer),
        ('transfomer', preoprocessor),
        ('regressor', RandomForestRegressor(n_estimators=10, max_depth=10,
                                            max_features=10)),
    ])
    return pipeline

Example #9

Source File: sklearn_mnist_preprocessor.py From aws-step-functions-data-science-sdk-python with Apache License 2.0

5 votes

vote down

vote up

def create_preprocessing_pipeline(num_columns):
    preprocessor = make_column_transformer(
        (np.arange(num_columns), StandardScaler()),
        remainder='passthrough'
    )
    return preprocessor