Python sklearn.exceptions.DataConversionWarning() Examples
The following are 10
code examples of sklearn.exceptions.DataConversionWarning().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.exceptions
, or try the search function
.
Example #1
Source File: pairwise.py From mars with Apache License 2.0 | 5 votes |
def pairwise_distances(X, Y=None, metric="euclidean", **kwds): if (metric not in _VALID_METRICS and not callable(metric) and metric != "precomputed"): raise ValueError("Unknown metric %s. " "Valid metrics are %s, or 'precomputed', or a " "callable" % (metric, _VALID_METRICS)) if metric == "precomputed": X, _ = PairwiseDistances.check_pairwise_arrays(X, Y, precomputed=True) whom = ("`pairwise_distances`. Precomputed distance " " need to have non-negative values.") X = check_non_negative(X, whom=whom) return X elif metric in PAIRWISE_DISTANCE_FUNCTIONS: func = PAIRWISE_DISTANCE_FUNCTIONS[metric] else: # including when metric is callable dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None if (dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)) and DataConversionWarning is not None): msg = "Data was converted to boolean for metric %s" % metric warnings.warn(msg, DataConversionWarning) X, Y = PairwiseDistances.check_pairwise_arrays(X, Y, dtype=dtype) if X is Y: return distance.squareform(distance.pdist(X, metric=metric, **kwds)) func = partial(distance.cdist, metric=metric, **kwds) return func(X, Y, **kwds)
Example #2
Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_shape_y(): # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) y_ = np.asarray(y, dtype=np.int32) y_ = y_[:, np.newaxis] # This will raise a DataConversionWarning that we want to # "always" raise, elsewhere the warnings gets ignored in the # later tests, and the tests that check for this warning fail assert_warns(DataConversionWarning, clf.fit, X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
Example #3
Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pairwise_boolean_distance(metric): # test that we convert to boolean arrays for boolean distances rng = np.random.RandomState(0) X = rng.randn(5, 4) Y = X.copy() Y[0, 0] = 1 - Y[0, 0] # ignore conversion to boolean in pairwise_distances with ignore_warnings(category=DataConversionWarning): for Z in [Y, None]: res = pairwise_distances(X, Z, metric=metric) res[np.isnan(res)] = 0 assert np.sum(res != 0) == 0 # non-boolean arrays are converted to boolean for boolean # distance metrics with a data conversion warning msg = "Data was converted to boolean for metric %s" % metric with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X, metric=metric) # Check that the warning is raised if X is boolean by Y is not boolean: with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X.astype(bool), Y=Y, metric=metric) # Check that no warning is raised if X is already boolean and Y is None: with pytest.warns(None) as records: pairwise_distances(X.astype(bool), metric=metric) assert len(records) == 0
Example #4
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df, dtype='object', warn_on_dtype=True) assert len(record) == 0 # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True) assert len(record) == 0
Example #5
Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License | 5 votes |
def check_supervised_y_2d(name, estimator_orig): tags = estimator_orig._get_tags() X, y = _create_small_ts_dataset() if tags['binary_only']: X = X[y != 2] y = y[y != 2] estimator = clone(estimator_orig) set_random_state(estimator) # fit estimator.fit(X, y) y_pred = estimator.predict(X) set_random_state(estimator) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) estimator.fit(X, y[:, np.newaxis]) y_pred_2d = estimator.predict(X) msg = "expected 1 DataConversionWarning, got: %s" % ( ", ".join([str(w_x) for w_x in w])) if not tags['multioutput'] and name not in ['TimeSeriesSVR']: # check that we warned if we don't support multi-output assert len(w) > 0, msg assert "DataConversionWarning('A column-vector y" \ " was passed when a 1d array was expected" in msg assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
Example #6
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def check_supervised_y_2d(name, estimator_orig): if "MultiTask" in name: # These only work on 2d, so this test makes no sense return rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 estimator = clone(estimator_orig) set_random_state(estimator) # fit estimator.fit(X, y) y_pred = estimator.predict(X) set_random_state(estimator) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) estimator.fit(X, y[:, np.newaxis]) y_pred_2d = estimator.predict(X) msg = "expected 1 DataConversionWarning, got: %s" % ( ", ".join([str(w_x) for w_x in w])) if name not in MULTI_OUTPUT: # check that we warned if we don't support multi-output assert_greater(len(w), 0, msg) assert_true("DataConversionWarning('A column-vector y" " was passed when a 1d array was expected" in msg) assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
Example #7
Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_shape_y(): # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) y_ = np.asarray(y, dtype=np.int32) y_ = y_[:, np.newaxis] # This will raise a DataConversionWarning that we want to # "always" raise, elsewhere the warnings gets ignored in the # later tests, and the tests that check for this warning fail assert_warns(DataConversionWarning, clf.fit, X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
Example #8
Source File: estimator_checks.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_supervised_y_2d(name, estimator_orig): if "MultiTask" in name: # These only work on 2d, so this test makes no sense return if name == "GaussianProcess": # Workaround: https://github.com/scikit-learn/scikit-learn/issues/10562 return rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 estimator = clone(estimator_orig) set_random_state(estimator) # fit estimator.fit(X, y) y_pred = estimator.predict(X) set_random_state(estimator) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) estimator.fit(X, y[:, np.newaxis]) y_pred_2d = estimator.predict(X) msg = "expected 1 DataConversionWarning, got: %s" % ( ", ".join([str(w_x) for w_x in w])) if name not in MULTI_OUTPUT: # check that we warned if we don't support multi-output assert_greater(len(w), 0, msg) assert_true("DataConversionWarning('A column-vector y" " was passed when a 1d array was expected" in msg) assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
Example #9
Source File: test_data.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_warning_scaling_integers(): # Check warning when scaling integer data X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8) w = "Data with input dtype uint8 was converted to float64" clean_warning_registry() assert_warns_message(DataConversionWarning, w, scale, X) assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X) assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)
Example #10
Source File: test_pariwise_distances.py From mars with Apache License 2.0 | 4 votes |
def testPairwiseDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x = mt.tensor(raw_x, chunk_size=11) y = mt.tensor(raw_y, chunk_size=12) d = pairwise_distances(x, y) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y) np.testing.assert_almost_equal(result, expected) # test precomputed d2 = d.copy() d2[0, 0] = -1 d2 = pairwise_distances(d2, y, metric='precomputed') with self.assertRaises(ValueError): _ = self.executor.execute_tensor(d2, concat=True)[0] # test cdist weight = np.random.rand(5) d = pairwise_distances(x, y, metric='wminkowski', p=3, w=weight) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric='wminkowski', p=3, w=weight) np.testing.assert_almost_equal(result, expected) # test pdist d = pairwise_distances(x, metric='hamming') result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, metric='hamming') np.testing.assert_almost_equal(result, expected) # test function metric m = lambda u, v: np.sqrt(((u-v)**2).sum()) d = pairwise_distances(x, y, metric=m) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric=m) np.testing.assert_almost_equal(result, expected) assert_warns(DataConversionWarning, pairwise_distances, x, y, metric='jaccard') with self.assertRaises(ValueError): _ = pairwise_distances(x, y, metric='unknown')