Python sklearn.preprocessing.KBinsDiscretizer() Examples
The following are 26
code examples of sklearn.preprocessing.KBinsDiscretizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_onehot_dense_quantile_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #2
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_onehot_dense_uniform_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnKBinsDiscretiserOneHotDenseUniformInt", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #3
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_ordinal_kmeans_int(self): X = np.array([ [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9], [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19] ]) model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalKMeansInt", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #4
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_ordinal_quantile_int(self): X = np.array([ [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9], [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19], [12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91] ]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalQuantileInt", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #5
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_ordinal_uniform_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalUniformInt", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #6
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_onehot_dense_uniform(self): X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnKBinsDiscretiserOneHotDenseUniform", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #7
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_encode_options(): est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='ordinal').fit(X) Xt_1 = est.transform(X) est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='onehot-dense').fit(X) Xt_2 = est.transform(X) assert not sp.issparse(Xt_2) assert_array_equal(OneHotEncoder( categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse=False) .fit_transform(Xt_1), Xt_2) est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='onehot').fit(X) Xt_3 = est.transform(X) assert sp.issparse(Xt_3) assert_array_equal(OneHotEncoder( categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse=True) .fit_transform(Xt_1).toarray(), Xt_3.toarray())
Example #8
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_nonuniform_strategies( strategy, expected_2bins, expected_3bins, expected_5bins): X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1) # with 2 bins est = KBinsDiscretizer(n_bins=2, strategy=strategy, encode='ordinal') Xt = est.fit_transform(X) assert_array_equal(expected_2bins, Xt.ravel()) # with 3 bins est = KBinsDiscretizer(n_bins=3, strategy=strategy, encode='ordinal') Xt = est.fit_transform(X) assert_array_equal(expected_3bins, Xt.ravel()) # with 5 bins est = KBinsDiscretizer(n_bins=5, strategy=strategy, encode='ordinal') Xt = est.fit_transform(X) assert_array_equal(expected_5bins, Xt.ravel())
Example #9
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_k_bins_discretiser_ordinal_uniform(self): X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]]) model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalUniform", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #10
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_overwrite(): X = np.array([0, 1, 2, 3])[:, None] X_before = X.copy() est = KBinsDiscretizer(n_bins=3, encode="ordinal") Xt = est.fit_transform(X) assert_array_equal(X, X_before) Xt_before = Xt.copy() Xinv = est.inverse_transform(Xt) assert_array_equal(Xt, Xt_before) assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]]))
Example #11
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_k_bins_discretiser_onehot_dense_quantile(self): X = np.array([ [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], ]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnKBinsDiscretiserOneHotDenseQuantile", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #12
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_k_bins_discretiser_ordinal_kmeans(self): X = np.array([ [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], ]) model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalKMeans", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #13
Source File: test_sklearn_k_bins_discretiser_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_k_bins_discretiser_ordinal_quantile(self): X = np.array([ [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], ]) model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnKBinsDiscretiserOrdinalQuantile", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #14
Source File: utils.py From SDGym with MIT License | 5 votes |
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()): self.meta = self.get_metadata(data, categorical_columns, ordinal_columns) self.column_index = [ index for index, info in enumerate(self.meta) if info['type'] == CONTINUOUS] self.discretizer = KBinsDiscretizer( n_bins=self.n_bins, encode='ordinal', strategy='uniform') if not self.column_index: return self.discretizer.fit(data[:, self.column_index])
Example #15
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_redundant_bins(strategy, expected_bin_edges): X = [[0], [0], [0], [0], [3], [3]] kbd = KBinsDiscretizer(n_bins=3, strategy=strategy) msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
Example #16
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_fit_transform(strategy, expected): est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy) est.fit(X) assert_array_equal(expected, est.transform(X))
Example #17
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_transform_outside_fit_range(strategy): X = np.array([0, 1, 2, 3])[:, None] kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode='ordinal') kbd.fit(X) X2 = np.array([-2, 5])[:, None] X2t = kbd.transform(X2) assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_) assert_array_equal(X2t.min(axis=0), [0])
Example #18
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_inverse_transform(strategy, encode, expected_inv): kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, encode=encode) Xt = kbd.fit_transform(X) Xinv = kbd.inverse_transform(Xt) assert_array_almost_equal(expected_inv, Xinv)
Example #19
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_invalid_encode_option(): est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='invalid-encode') assert_raise_message(ValueError, "Valid options for 'encode' are " "('onehot', 'onehot-dense', 'ordinal'). " "Got encode='invalid-encode' instead.", est.fit, X)
Example #20
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_numeric_stability(): X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1) Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1) # Test up to discretizing nano units for i in range(1, 9): X = X_init / 10**i Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X) assert_array_equal(Xt_expected, Xt)
Example #21
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_transform_1d_behavior(): X = np.arange(4) est = KBinsDiscretizer(n_bins=2) assert_raises(ValueError, est.fit, X) est = KBinsDiscretizer(n_bins=2) est.fit(X.reshape(-1, 1)) assert_raises(ValueError, est.transform, X)
Example #22
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_same_min_max(strategy): warnings.simplefilter("always") X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]]) est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal') assert_warns_message(UserWarning, "Feature 0 is constant and will be replaced " "with 0.", est.fit, X) assert est.n_bins_[0] == 1 # replace the feature with zeros Xt = est.transform(X) assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
Example #23
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_fit_transform_n_bins_array(strategy, expected): est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='ordinal', strategy=strategy).fit(X) assert_array_equal(expected, est.transform(X)) # test the shape of bin_edges_ n_features = np.array(X).shape[1] assert est.bin_edges_.shape == (n_features, ) for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_): assert bin_edges.shape == (n_bins + 1, )
Example #24
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_invalid_n_bins_array(): # Bad shape n_bins = np.full((2, 4), 2.) est = KBinsDiscretizer(n_bins=n_bins) assert_raise_message(ValueError, "n_bins must be a scalar or array of shape " "(n_features,).", est.fit_transform, X) # Incorrect number of features n_bins = [1, 2, 2] est = KBinsDiscretizer(n_bins=n_bins) assert_raise_message(ValueError, "n_bins must be a scalar or array of shape " "(n_features,).", est.fit_transform, X) # Bad bin values n_bins = [1, 2, 2, 1] est = KBinsDiscretizer(n_bins=n_bins) assert_raise_message(ValueError, "KBinsDiscretizer received an invalid number of bins " "at indices 0, 3. Number of bins must be at least 2, " "and must be an int.", est.fit_transform, X) # Float bin values n_bins = [2.1, 2, 2.1, 2] est = KBinsDiscretizer(n_bins=n_bins) assert_raise_message(ValueError, "KBinsDiscretizer received an invalid number of bins " "at indices 0, 2. Number of bins must be at least 2, " "and must be an int.", est.fit_transform, X)
Example #25
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_invalid_n_bins(): est = KBinsDiscretizer(n_bins=1) assert_raise_message(ValueError, "KBinsDiscretizer received an invalid " "number of bins. Received 1, expected at least 2.", est.fit_transform, X) est = KBinsDiscretizer(n_bins=1.1) assert_raise_message(ValueError, "KBinsDiscretizer received an invalid " "n_bins type. Received float, expected int.", est.fit_transform, X)
Example #26
Source File: test_discretization.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_valid_n_bins(): KBinsDiscretizer(n_bins=2).fit_transform(X) KBinsDiscretizer(n_bins=np.array([2])[0]).fit_transform(X) assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(np.int)