Python sklearn.exceptions.DataConversionWarning() Examples

The following are 10 code examples of sklearn.exceptions.DataConversionWarning(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.exceptions , or try the search function .
Example #1
Source File: pairwise.py    From mars with Apache License 2.0 5 votes vote down vote up
def pairwise_distances(X, Y=None, metric="euclidean", **kwds):
    if (metric not in _VALID_METRICS and
            not callable(metric) and metric != "precomputed"):
        raise ValueError("Unknown metric %s. "
                         "Valid metrics are %s, or 'precomputed', or a "
                         "callable" % (metric, _VALID_METRICS))

    if metric == "precomputed":
        X, _ = PairwiseDistances.check_pairwise_arrays(X, Y, precomputed=True)

        whom = ("`pairwise_distances`. Precomputed distance "
                " need to have non-negative values.")
        X = check_non_negative(X, whom=whom)
        return X
    elif metric in PAIRWISE_DISTANCE_FUNCTIONS:
        func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
    else:
        # including when metric is callable
        dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None

        if (dtype == bool and
                (X.dtype != bool or (Y is not None and Y.dtype != bool)) and
                DataConversionWarning is not None):
            msg = "Data was converted to boolean for metric %s" % metric
            warnings.warn(msg, DataConversionWarning)

        X, Y = PairwiseDistances.check_pairwise_arrays(X, Y, dtype=dtype)
        if X is Y:
            return distance.squareform(distance.pdist(X, metric=metric, **kwds))
        func = partial(distance.cdist, metric=metric, **kwds)

    return func(X, Y, **kwds) 
Example #2
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_shape_y():
    # Test with float class labels.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    y_ = np.asarray(y, dtype=np.int32)
    y_ = y_[:, np.newaxis]

    # This will raise a DataConversionWarning that we want to
    # "always" raise, elsewhere the warnings gets ignored in the
    # later tests, and the tests that check for this warning fail
    assert_warns(DataConversionWarning, clf.fit, X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_)) 
Example #3
Source File: test_pairwise.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_pairwise_boolean_distance(metric):
    # test that we convert to boolean arrays for boolean distances
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    Y = X.copy()
    Y[0, 0] = 1 - Y[0, 0]

    # ignore conversion to boolean in pairwise_distances
    with ignore_warnings(category=DataConversionWarning):
        for Z in [Y, None]:
            res = pairwise_distances(X, Z, metric=metric)
            res[np.isnan(res)] = 0
            assert np.sum(res != 0) == 0

    # non-boolean arrays are converted to boolean for boolean
    # distance metrics with a data conversion warning
    msg = "Data was converted to boolean for metric %s" % metric
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X, metric=metric)

    # Check that the warning is raised if X is boolean by Y is not boolean:
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X.astype(bool), Y=Y, metric=metric)

    # Check that no warning is raised if X is already boolean and Y is None:
    with pytest.warns(None) as records:
        pairwise_distances(X.astype(bool), metric=metric)
    assert len(records) == 0 
Example #4
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df, dtype='object', warn_on_dtype=True)
    assert len(record) == 0

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df_mixed_numeric.astype(int),
                    dtype='numeric', warn_on_dtype=True)
    assert len(record) == 0 
Example #5
Source File: sklearn_patches.py    From tslearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def check_supervised_y_2d(name, estimator_orig):
    tags = estimator_orig._get_tags()
    X, y = _create_small_ts_dataset()
    if tags['binary_only']:
        X = X[y != 2]
        y = y[y != 2]

    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))

    if not tags['multioutput'] and name not in ['TimeSeriesSVR']:
        # check that we warned if we don't support multi-output
        assert len(w) > 0, msg
        assert "DataConversionWarning('A column-vector y" \
               " was passed when a 1d array was expected" in msg
        assert_allclose(y_pred.ravel(), y_pred_2d.ravel()) 
Example #6
Source File: estimator_checks.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def check_supervised_y_2d(name, estimator_orig):
    if "MultiTask" in name:
        # These only work on 2d, so this test makes no sense
        return
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 3))
    y = np.arange(10) % 3
    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))
    if name not in MULTI_OUTPUT:
        # check that we warned if we don't support multi-output
        assert_greater(len(w), 0, msg)
        assert_true("DataConversionWarning('A column-vector y"
                    " was passed when a 1d array was expected" in msg)
    assert_allclose(y_pred.ravel(), y_pred_2d.ravel()) 
Example #7
Source File: test_gradient_boosting.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_shape_y():
    # Test with float class labels.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    y_ = np.asarray(y, dtype=np.int32)
    y_ = y_[:, np.newaxis]

    # This will raise a DataConversionWarning that we want to
    # "always" raise, elsewhere the warnings gets ignored in the
    # later tests, and the tests that check for this warning fail
    assert_warns(DataConversionWarning, clf.fit, X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_)) 
Example #8
Source File: estimator_checks.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_supervised_y_2d(name, estimator_orig):
    if "MultiTask" in name:
        # These only work on 2d, so this test makes no sense
        return
    if name == "GaussianProcess":
        # Workaround: https://github.com/scikit-learn/scikit-learn/issues/10562
        return
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 3))
    y = np.arange(10) % 3
    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))
    if name not in MULTI_OUTPUT:
        # check that we warned if we don't support multi-output
        assert_greater(len(w), 0, msg)
        assert_true("DataConversionWarning('A column-vector y"
                    " was passed when a 1d array was expected" in msg)
    assert_allclose(y_pred.ravel(), y_pred_2d.ravel()) 
Example #9
Source File: test_data.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_warning_scaling_integers():
    # Check warning when scaling integer data
    X = np.array([[1, 2, 0],
                  [0, 0, 0]], dtype=np.uint8)

    w = "Data with input dtype uint8 was converted to float64"

    clean_warning_registry()
    assert_warns_message(DataConversionWarning, w, scale, X)
    assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X)
    assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X) 
Example #10
Source File: test_pariwise_distances.py    From mars with Apache License 2.0 4 votes vote down vote up
def testPairwiseDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x = mt.tensor(raw_x, chunk_size=11)
        y = mt.tensor(raw_y, chunk_size=12)

        d = pairwise_distances(x, y)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y)
        np.testing.assert_almost_equal(result, expected)

        # test precomputed
        d2 = d.copy()
        d2[0, 0] = -1
        d2 = pairwise_distances(d2, y, metric='precomputed')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(d2, concat=True)[0]

        # test cdist
        weight = np.random.rand(5)
        d = pairwise_distances(x, y, metric='wminkowski', p=3,
                               w=weight)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric='wminkowski',
                                         p=3, w=weight)
        np.testing.assert_almost_equal(result, expected)

        # test pdist
        d = pairwise_distances(x, metric='hamming')
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, metric='hamming')
        np.testing.assert_almost_equal(result, expected)

        # test function metric
        m = lambda u, v: np.sqrt(((u-v)**2).sum())
        d = pairwise_distances(x, y, metric=m)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric=m)
        np.testing.assert_almost_equal(result, expected)

        assert_warns(DataConversionWarning,
                     pairwise_distances, x, y, metric='jaccard')

        with self.assertRaises(ValueError):
            _ = pairwise_distances(x, y, metric='unknown')