Python Examples of sklearn.exceptions.DataConversionWarning

Source File: pairwise.py From mars with Apache License 2.0

5 votes

def pairwise_distances(X, Y=None, metric="euclidean", **kwds):
    if (metric not in _VALID_METRICS and
            not callable(metric) and metric != "precomputed"):
        raise ValueError("Unknown metric %s. "
                         "Valid metrics are %s, or 'precomputed', or a "
                         "callable" % (metric, _VALID_METRICS))

    if metric == "precomputed":
        X, _ = PairwiseDistances.check_pairwise_arrays(X, Y, precomputed=True)

        whom = ("`pairwise_distances`. Precomputed distance "
                " need to have non-negative values.")
        X = check_non_negative(X, whom=whom)
        return X
    elif metric in PAIRWISE_DISTANCE_FUNCTIONS:
        func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
    else:
        # including when metric is callable
        dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None

        if (dtype == bool and
                (X.dtype != bool or (Y is not None and Y.dtype != bool)) and
                DataConversionWarning is not None):
            msg = "Data was converted to boolean for metric %s" % metric
            warnings.warn(msg, DataConversionWarning)

        X, Y = PairwiseDistances.check_pairwise_arrays(X, Y, dtype=dtype)
        if X is Y:
            return distance.squareform(distance.pdist(X, metric=metric, **kwds))
        func = partial(distance.cdist, metric=metric, **kwds)

    return func(X, Y, **kwds)

Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_shape_y():
    # Test with float class labels.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    y_ = np.asarray(y, dtype=np.int32)
    y_ = y_[:, np.newaxis]

    # This will raise a DataConversionWarning that we want to
    # "always" raise, elsewhere the warnings gets ignored in the
    # later tests, and the tests that check for this warning fail
    assert_warns(DataConversionWarning, clf.fit, X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_pairwise_boolean_distance(metric):
    # test that we convert to boolean arrays for boolean distances
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    Y = X.copy()
    Y[0, 0] = 1 - Y[0, 0]

    # ignore conversion to boolean in pairwise_distances
    with ignore_warnings(category=DataConversionWarning):
        for Z in [Y, None]:
            res = pairwise_distances(X, Z, metric=metric)
            res[np.isnan(res)] = 0
            assert np.sum(res != 0) == 0

    # non-boolean arrays are converted to boolean for boolean
    # distance metrics with a data conversion warning
    msg = "Data was converted to boolean for metric %s" % metric
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X, metric=metric)

    # Check that the warning is raised if X is boolean by Y is not boolean:
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X.astype(bool), Y=Y, metric=metric)

    # Check that no warning is raised if X is already boolean and Y is None:
    with pytest.warns(None) as records:
        pairwise_distances(X.astype(bool), metric=metric)
    assert len(records) == 0

Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df, dtype='object', warn_on_dtype=True)
    assert len(record) == 0

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df_mixed_numeric.astype(int),
                    dtype='numeric', warn_on_dtype=True)
    assert len(record) == 0

Source File: sklearn_patches.py From tslearn with BSD 2-Clause "Simplified" License

5 votes

def check_supervised_y_2d(name, estimator_orig):
    tags = estimator_orig._get_tags()
    X, y = _create_small_ts_dataset()
    if tags['binary_only']:
        X = X[y != 2]
        y = y[y != 2]

    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))

    if not tags['multioutput'] and name not in ['TimeSeriesSVR']:
        # check that we warned if we don't support multi-output
        assert len(w) > 0, msg
        assert "DataConversionWarning('A column-vector y" \
               " was passed when a 1d array was expected" in msg
        assert_allclose(y_pred.ravel(), y_pred_2d.ravel())

Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def check_supervised_y_2d(name, estimator_orig):
    if "MultiTask" in name:
        # These only work on 2d, so this test makes no sense
        return
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 3))
    y = np.arange(10) % 3
    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))
    if name not in MULTI_OUTPUT:
        # check that we warned if we don't support multi-output
        assert_greater(len(w), 0, msg)
        assert_true("DataConversionWarning('A column-vector y"
                    " was passed when a 1d array was expected" in msg)
    assert_allclose(y_pred.ravel(), y_pred_2d.ravel())

Source File: test_gradient_boosting.py From twitter-stock-recommendation with MIT License

5 votes

def test_shape_y():
    # Test with float class labels.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    y_ = np.asarray(y, dtype=np.int32)
    y_ = y_[:, np.newaxis]

    # This will raise a DataConversionWarning that we want to
    # "always" raise, elsewhere the warnings gets ignored in the
    # later tests, and the tests that check for this warning fail
    assert_warns(DataConversionWarning, clf.fit, X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

Source File: estimator_checks.py From twitter-stock-recommendation with MIT License

5 votes

def check_supervised_y_2d(name, estimator_orig):
    if "MultiTask" in name:
        # These only work on 2d, so this test makes no sense
        return
    if name == "GaussianProcess":
        # Workaround: https://github.com/scikit-learn/scikit-learn/issues/10562
        return
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 3))
    y = np.arange(10) % 3
    estimator = clone(estimator_orig)
    set_random_state(estimator)
    # fit
    estimator.fit(X, y)
    y_pred = estimator.predict(X)

    set_random_state(estimator)
    # Check that when a 2D y is given, a DataConversionWarning is
    # raised
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DataConversionWarning)
        warnings.simplefilter("ignore", RuntimeWarning)
        estimator.fit(X, y[:, np.newaxis])
    y_pred_2d = estimator.predict(X)
    msg = "expected 1 DataConversionWarning, got: %s" % (
        ", ".join([str(w_x) for w_x in w]))
    if name not in MULTI_OUTPUT:
        # check that we warned if we don't support multi-output
        assert_greater(len(w), 0, msg)
        assert_true("DataConversionWarning('A column-vector y"
                    " was passed when a 1d array was expected" in msg)
    assert_allclose(y_pred.ravel(), y_pred_2d.ravel())

Source File: test_data.py From twitter-stock-recommendation with MIT License

5 votes

def test_warning_scaling_integers():
    # Check warning when scaling integer data
    X = np.array([[1, 2, 0],
                  [0, 0, 0]], dtype=np.uint8)

    w = "Data with input dtype uint8 was converted to float64"

    clean_warning_registry()
    assert_warns_message(DataConversionWarning, w, scale, X)
    assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X)
    assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)

Source File: test_pariwise_distances.py From mars with Apache License 2.0

4 votes

def testPairwiseDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x = mt.tensor(raw_x, chunk_size=11)
        y = mt.tensor(raw_y, chunk_size=12)

        d = pairwise_distances(x, y)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y)
        np.testing.assert_almost_equal(result, expected)

        # test precomputed
        d2 = d.copy()
        d2[0, 0] = -1
        d2 = pairwise_distances(d2, y, metric='precomputed')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(d2, concat=True)[0]

        # test cdist
        weight = np.random.rand(5)
        d = pairwise_distances(x, y, metric='wminkowski', p=3,
                               w=weight)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric='wminkowski',
                                         p=3, w=weight)
        np.testing.assert_almost_equal(result, expected)

        # test pdist
        d = pairwise_distances(x, metric='hamming')
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, metric='hamming')
        np.testing.assert_almost_equal(result, expected)

        # test function metric
        m = lambda u, v: np.sqrt(((u-v)**2).sum())
        d = pairwise_distances(x, y, metric=m)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric=m)
        np.testing.assert_almost_equal(result, expected)

        assert_warns(DataConversionWarning,
                     pairwise_distances, x, y, metric='jaccard')

        with self.assertRaises(ValueError):
            _ = pairwise_distances(x, y, metric='unknown')

Python sklearn.exceptions.DataConversionWarning() Examples