Python sklearn.preprocessing.KBinsDiscretizer() Examples

The following are 26 code examples of sklearn.preprocessing.KBinsDiscretizer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_quantile_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #2
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_uniform_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseUniformInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #3
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_kmeans_int(self):
        X = np.array([
            [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
            [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19]
            ])
        model = KBinsDiscretizer(n_bins=3, encode="ordinal",
                                 strategy="kmeans").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalKMeansInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #4
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_quantile_int(self):
        X = np.array([
            [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
            [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19],
            [12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91]
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="ordinal",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalQuantileInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #5
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_uniform_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=3,
                                 encode="ordinal",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalUniformInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #6
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_uniform(self):
        X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
                      [0, 3.2, 4.7, -8.9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseUniform",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #7
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_encode_options():
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='ordinal').fit(X)
    Xt_1 = est.transform(X)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot-dense').fit(X)
    Xt_2 = est.transform(X)
    assert not sp.issparse(Xt_2)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=False)
                       .fit_transform(Xt_1), Xt_2)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot').fit(X)
    Xt_3 = est.transform(X)
    assert sp.issparse(Xt_3)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=True)
                       .fit_transform(Xt_1).toarray(),
                       Xt_3.toarray()) 
Example #8
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_nonuniform_strategies(
        strategy, expected_2bins, expected_3bins, expected_5bins):
    X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1)

    # with 2 bins
    est = KBinsDiscretizer(n_bins=2, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_2bins, Xt.ravel())

    # with 3 bins
    est = KBinsDiscretizer(n_bins=3, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_3bins, Xt.ravel())

    # with 5 bins
    est = KBinsDiscretizer(n_bins=5, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_5bins, Xt.ravel()) 
Example #9
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_uniform(self):
        X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
                      [0, 3.2, 4.7, -8.9]])
        model = KBinsDiscretizer(n_bins=3,
                                 encode="ordinal",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalUniform",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #10
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_overwrite():
    X = np.array([0, 1, 2, 3])[:, None]
    X_before = X.copy()

    est = KBinsDiscretizer(n_bins=3, encode="ordinal")
    Xt = est.fit_transform(X)
    assert_array_equal(X, X_before)

    Xt_before = Xt.copy()
    Xinv = est.inverse_transform(Xt)
    assert_array_equal(Xt, Xt_before)
    assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]])) 
Example #11
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_quantile(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseQuantile",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #12
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_kmeans(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=3, encode="ordinal",
                                 strategy="kmeans").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalKMeans",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #13
Source File: test_sklearn_k_bins_discretiser_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_quantile(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="ordinal",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalQuantile",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #14
Source File: utils.py    From SDGym with MIT License 5 votes vote down vote up
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
        self.column_index = [
            index for index, info in enumerate(self.meta) if info['type'] == CONTINUOUS]

        self.discretizer = KBinsDiscretizer(
            n_bins=self.n_bins, encode='ordinal', strategy='uniform')

        if not self.column_index:
            return

        self.discretizer.fit(data[:, self.column_index]) 
Example #15
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_redundant_bins(strategy, expected_bin_edges):
    X = [[0], [0], [0], [0], [3], [3]]
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy)
    msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
           "are removed. Consider decreasing the number of bins.")
    assert_warns_message(UserWarning, msg, kbd.fit, X)
    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges) 
Example #16
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_fit_transform(strategy, expected):
    est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy)
    est.fit(X)
    assert_array_equal(expected, est.transform(X)) 
Example #17
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_transform_outside_fit_range(strategy):
    X = np.array([0, 1, 2, 3])[:, None]
    kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode='ordinal')
    kbd.fit(X)

    X2 = np.array([-2, 5])[:, None]
    X2t = kbd.transform(X2)
    assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_)
    assert_array_equal(X2t.min(axis=0), [0]) 
Example #18
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_inverse_transform(strategy, encode, expected_inv):
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, encode=encode)
    Xt = kbd.fit_transform(X)
    Xinv = kbd.inverse_transform(Xt)
    assert_array_almost_equal(expected_inv, Xinv) 
Example #19
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_invalid_encode_option():
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='invalid-encode')
    assert_raise_message(ValueError, "Valid options for 'encode' are "
                         "('onehot', 'onehot-dense', 'ordinal'). "
                         "Got encode='invalid-encode' instead.",
                         est.fit, X) 
Example #20
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_numeric_stability():
    X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1)
    Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)

    # Test up to discretizing nano units
    for i in range(1, 9):
        X = X_init / 10**i
        Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X)
        assert_array_equal(Xt_expected, Xt) 
Example #21
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_transform_1d_behavior():
    X = np.arange(4)
    est = KBinsDiscretizer(n_bins=2)
    assert_raises(ValueError, est.fit, X)

    est = KBinsDiscretizer(n_bins=2)
    est.fit(X.reshape(-1, 1))
    assert_raises(ValueError, est.transform, X) 
Example #22
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_same_min_max(strategy):
    warnings.simplefilter("always")
    X = np.array([[1, -2],
                  [1, -1],
                  [1, 0],
                  [1, 1]])
    est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal')
    assert_warns_message(UserWarning,
                         "Feature 0 is constant and will be replaced "
                         "with 0.", est.fit, X)
    assert est.n_bins_[0] == 1
    # replace the feature with zeros
    Xt = est.transform(X)
    assert_array_equal(Xt[:, 0], np.zeros(X.shape[0])) 
Example #23
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_fit_transform_n_bins_array(strategy, expected):
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='ordinal',
                           strategy=strategy).fit(X)
    assert_array_equal(expected, est.transform(X))

    # test the shape of bin_edges_
    n_features = np.array(X).shape[1]
    assert est.bin_edges_.shape == (n_features, )
    for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_):
        assert bin_edges.shape == (n_bins + 1, ) 
Example #24
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_invalid_n_bins_array():
    # Bad shape
    n_bins = np.full((2, 4), 2.)
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "n_bins must be a scalar or array of shape "
                         "(n_features,).", est.fit_transform, X)

    # Incorrect number of features
    n_bins = [1, 2, 2]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "n_bins must be a scalar or array of shape "
                         "(n_features,).", est.fit_transform, X)

    # Bad bin values
    n_bins = [1, 2, 2, 1]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "KBinsDiscretizer received an invalid number of bins "
                         "at indices 0, 3. Number of bins must be at least 2, "
                         "and must be an int.",
                         est.fit_transform, X)

    # Float bin values
    n_bins = [2.1, 2, 2.1, 2]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "KBinsDiscretizer received an invalid number of bins "
                         "at indices 0, 2. Number of bins must be at least 2, "
                         "and must be an int.",
                         est.fit_transform, X) 
Example #25
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_invalid_n_bins():
    est = KBinsDiscretizer(n_bins=1)
    assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
                         "number of bins. Received 1, expected at least 2.",
                         est.fit_transform, X)

    est = KBinsDiscretizer(n_bins=1.1)
    assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
                         "n_bins type. Received float, expected int.",
                         est.fit_transform, X) 
Example #26
Source File: test_discretization.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_valid_n_bins():
    KBinsDiscretizer(n_bins=2).fit_transform(X)
    KBinsDiscretizer(n_bins=np.array([2])[0]).fit_transform(X)
    assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(np.int)