Python sklearn.utils.testing.ignore_warnings() Examples

The following are 30 code examples of sklearn.utils.testing.ignore_warnings(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.testing , or try the search function .
Example #1
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_sparse_input():
    # note: Fixed random state in sp.rand is not supported in older scipy.
    #       The test should succeed regardless.
    X1 = sp.rand(50, 100)
    X2 = sp.rand(10, 100)
    forest_sparse = ignore_warnings(LSHForest, category=DeprecationWarning)(
        radius=1, random_state=0).fit(X1)
    forest_dense = ignore_warnings(LSHForest, category=DeprecationWarning)(
        radius=1, random_state=0).fit(X1.A)

    d_sparse, i_sparse = forest_sparse.kneighbors(X2, return_distance=True)
    d_dense, i_dense = forest_dense.kneighbors(X2.A, return_distance=True)

    assert_almost_equal(d_sparse, d_dense)
    assert_almost_equal(i_sparse, i_dense)

    d_sparse, i_sparse = forest_sparse.radius_neighbors(X2,
                                                        return_distance=True)
    d_dense, i_dense = forest_dense.radius_neighbors(X2.A,
                                                     return_distance=True)
    assert_equal(d_sparse.shape, d_dense.shape)
    for a, b in zip(d_sparse, d_dense):
        assert_almost_equal(a, b)
    for a, b in zip(i_sparse, i_dense):
        assert_almost_equal(a, b) 
Example #2
Source File: test_least_angle.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_collinearity():
    # Check that lars_path is robust to collinearity in input
    X = np.array([[3., 3., 1.],
                  [2., 2., 0.],
                  [1., 1., 0]])
    y = np.array([1., 0., 0])
    rng = np.random.RandomState(0)

    f = ignore_warnings
    _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
    assert_true(not np.isnan(coef_path_).any())
    residual = np.dot(X, coef_path_[:, -1]) - y
    assert_less((residual ** 2).sum(), 1.)  # just make sure it's bounded

    n_samples = 10
    X = rng.rand(n_samples, 5)
    y = np.zeros(n_samples)
    _, _, coef_path_ = linear_model.lars_path(X, y, Gram='auto', copy_X=False,
                                              copy_Gram=False, alpha_min=0.,
                                              method='lasso', verbose=0,
                                              max_iter=500)
    assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_)) 
Example #3
Source File: test_discriminant_analysis.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = QuadraticDiscriminantAnalysis()
    with ignore_warnings():
        y_pred = clf.fit(X2, y6).predict(X2)
    assert_true(np.any(y_pred != y6))

    # adding a little regularization fixes the problem
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # Case n_samples_in_a_class < n_features
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5) 
Example #4
Source File: test_feature_select.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_selectkbest_tiebreaking():
    # Test whether SelectKBest actually selects k features in case of ties.
    # Prior to 0.11, SelectKBest would return more features than requested.
    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
    y = [1]
    dummy_score = lambda X, y: (X[0], X[0])
    for X in Xs:
        sel = SelectKBest(dummy_score, k=1)
        X1 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X1.shape[1], 1)
        assert_best_scores_kept(sel)

        sel = SelectKBest(dummy_score, k=2)
        X2 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X2.shape[1], 2)
        assert_best_scores_kept(sel) 
Example #5
Source File: test_discriminant_analysis.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = QuadraticDiscriminantAnalysis()
    with ignore_warnings():
        y_pred = clf.fit(X2, y6).predict(X2)
    assert np.any(y_pred != y6)

    # adding a little regularization fixes the problem
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # Case n_samples_in_a_class < n_features
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5) 
Example #6
Source File: test_feature_select.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_selectkbest_tiebreaking():
    # Test whether SelectKBest actually selects k features in case of ties.
    # Prior to 0.11, SelectKBest would return more features than requested.
    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
    y = [1]
    dummy_score = lambda X, y: (X[0], X[0])
    for X in Xs:
        sel = SelectKBest(dummy_score, k=1)
        X1 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X1.shape[1], 1)
        assert_best_scores_kept(sel)

        sel = SelectKBest(dummy_score, k=2)
        X2 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X2.shape[1], 2)
        assert_best_scores_kept(sel) 
Example #7
Source File: test_encoders.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_one_hot_encoder_dense():
    # check for sparse=False
    X = [[3, 2, 1], [0, 1, 1]]
    enc = OneHotEncoder(sparse=False)
    with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
        # discover max values automatically
        X_trans = enc.fit_transform(X)
        assert_equal(X_trans.shape, (2, 5))
        assert_array_equal(enc.active_features_,
                           np.where([1, 0, 0, 1, 0, 1, 1, 0, 1])[0])
        assert_array_equal(enc.feature_indices_, [0, 4, 7, 9])

    # check outcome
    assert_array_equal(X_trans,
                       np.array([[0., 1., 0., 1., 1.],
                                 [1., 0., 1., 0., 1.]])) 
Example #8
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_distances():
    # Checks whether returned neighbors are from closest to farthest.
    n_samples = 12
    n_features = 2
    n_iter = 10
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()
    ignore_warnings(lshf.fit)(X)

    for i in range(n_iter):
        n_neighbors = rng.randint(0, n_samples)
        query = X[rng.randint(0, n_samples)].reshape(1, -1)
        distances, neighbors = lshf.kneighbors(query,
                                               n_neighbors=n_neighbors,
                                               return_distance=True)

        # Returned neighbors should be from closest to farthest, that is
        # increasing distance values.
        assert_true(np.all(np.diff(distances[0]) >= 0))

        # Note: the radius_neighbors method does not guarantee the order of
        # the results. 
Example #9
Source File: keyword_based.py    From conversational-datasets with Apache License 2.0 6 votes vote down vote up
def train(self, contexts, responses):
        """Fit the tf-idf transform and compute idf statistics."""
        with ignore_warnings():
            # Ignore deprecated `non_negative` warning.
            self._vectorizer = HashingVectorizer(non_negative=True)
        self._tfidf_transform = TfidfTransformer()
        count_matrix = self._tfidf_transform.fit_transform(
            self._vectorizer.transform(contexts + responses))
        n_samples, n_features = count_matrix.shape
        df = _document_frequency(count_matrix)
        idf = np.log((n_samples - df + 0.5) / (df + 0.5))
        self._idf_diag = sp.spdiags(
            idf, diags=0, m=n_features, n=n_features
        )
        document_lengths = count_matrix.sum(axis=1)
        self._average_document_length = np.mean(document_lengths)
        print(self._average_document_length) 
Example #10
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_graphs():
    # Smoke tests for graph methods.
    n_samples_sizes = [5, 10, 20]
    n_features = 3
    rng = np.random.RandomState(42)

    for n_samples in n_samples_sizes:
        X = rng.rand(n_samples, n_features)
        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
            min_hash_match=0)
        ignore_warnings(lshf.fit)(X)

        kneighbors_graph = lshf.kneighbors_graph(X)
        radius_neighbors_graph = lshf.radius_neighbors_graph(X)

        assert_equal(kneighbors_graph.shape[0], n_samples)
        assert_equal(kneighbors_graph.shape[1], n_samples)
        assert_equal(radius_neighbors_graph.shape[0], n_samples)
        assert_equal(radius_neighbors_graph.shape[1], n_samples) 
Example #11
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_input_types():
    clf = Ridge()
    # Smoke test
    predictions = cval.cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_equal(predictions.shape, (10, 2))

    predictions = cval.cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_array_equal(predictions.shape, (10, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cval.cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cval.cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cval.cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (10,)) 
Example #12
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg, ElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) 
Example #13
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_warm_start():
    X, y, _, _ = build_dataset()
    clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, y)
    ignore_warnings(clf.fit)(X, y)  # do a second round with 5 iterations

    clf2 = ElasticNet(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, y)
    assert_array_almost_equal(clf2.coef_, clf.coef_) 
Example #14
Source File: test_dict_learning.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_sparse_encode_error_default_sparsity():
    rng = np.random.RandomState(0)
    X = rng.randn(100, 64)
    D = rng.randn(2, 64)
    code = ignore_warnings(sparse_encode)(X, D, algorithm='omp',
                                          n_nonzero_coefs=None)
    assert_equal(code.shape, (100, 2)) 
Example #15
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_neighbors_accuracy_with_n_candidates():
    # Checks whether accuracy increases as `n_candidates` increases.
    n_candidates_values = np.array([.1, 50, 500])
    n_samples = 100
    n_features = 10
    n_iter = 10
    n_points = 5
    rng = np.random.RandomState(42)
    accuracies = np.zeros(n_candidates_values.shape[0], dtype=float)
    X = rng.rand(n_samples, n_features)

    for i, n_candidates in enumerate(n_candidates_values):
        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
            n_candidates=n_candidates)
        ignore_warnings(lshf.fit)(X)
        for j in range(n_iter):
            query = X[rng.randint(0, n_samples)].reshape(1, -1)

            neighbors = lshf.kneighbors(query, n_neighbors=n_points,
                                        return_distance=False)
            distances = pairwise_distances(query, X, metric='cosine')
            ranks = np.argsort(distances)[0, :n_points]

            intersection = np.intersect1d(ranks, neighbors).shape[0]
            ratio = intersection / float(n_points)
            accuracies[i] = accuracies[i] + ratio

        accuracies[i] = accuracies[i] / float(n_iter)
    # Sorted accuracies should be equal to original accuracies
    print('accuracies:', accuracies)
    assert_true(np.all(np.diff(accuracies) >= 0),
                msg="Accuracies are not non-decreasing.")
    # Highest accuracy should be strictly greater than the lowest
    assert_true(np.ptp(accuracies) > 0,
                msg="Highest accuracy is not strictly greater than lowest.") 
Example #16
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_neighbors_accuracy_with_n_estimators():
    # Checks whether accuracy increases as `n_estimators` increases.
    n_estimators = np.array([1, 10, 100])
    n_samples = 100
    n_features = 10
    n_iter = 10
    n_points = 5
    rng = np.random.RandomState(42)
    accuracies = np.zeros(n_estimators.shape[0], dtype=float)
    X = rng.rand(n_samples, n_features)

    for i, t in enumerate(n_estimators):
        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
            n_candidates=500, n_estimators=t)
        ignore_warnings(lshf.fit)(X)
        for j in range(n_iter):
            query = X[rng.randint(0, n_samples)].reshape(1, -1)
            neighbors = lshf.kneighbors(query, n_neighbors=n_points,
                                        return_distance=False)
            distances = pairwise_distances(query, X, metric='cosine')
            ranks = np.argsort(distances)[0, :n_points]

            intersection = np.intersect1d(ranks, neighbors).shape[0]
            ratio = intersection / float(n_points)
            accuracies[i] = accuracies[i] + ratio

        accuracies[i] = accuracies[i] / float(n_iter)
    # Sorted accuracies should be equal to original accuracies
    assert_true(np.all(np.diff(accuracies) >= 0),
                msg="Accuracies are not non-decreasing.")
    # Highest accuracy should be strictly greater than the lowest
    assert_true(np.ptp(accuracies) > 0,
                msg="Highest accuracy is not strictly greater than lowest.") 
Example #17
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_fit():
    # Checks whether `fit` method sets all attribute values correctly.
    n_samples = 12
    n_features = 2
    n_estimators = 5
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
        n_estimators=n_estimators)
    ignore_warnings(lshf.fit)(X)

    # _input_array = X
    assert_array_equal(X, lshf._fit_X)
    # A hash function g(p) for each tree
    assert_equal(n_estimators, len(lshf.hash_functions_))
    # Hash length = 32
    assert_equal(32, lshf.hash_functions_[0].components_.shape[0])
    # Number of trees_ in the forest
    assert_equal(n_estimators, len(lshf.trees_))
    # Each tree has entries for every data point
    assert_equal(n_samples, len(lshf.trees_[0]))
    # Original indices after sorting the hashes
    assert_equal(n_estimators, len(lshf.original_indices_))
    # Each set of original indices in a tree has entries for every data point
    assert_equal(n_samples, len(lshf.original_indices_[0])) 
Example #18
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_partial_fit():
    # Checks whether inserting array is consistent with fitted data.
    # `partial_fit` method should set all attribute values correctly.
    n_samples = 12
    n_samples_partial_fit = 3
    n_features = 2
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)
    X_partial_fit = rng.rand(n_samples_partial_fit, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()

    # Test unfitted estimator
    ignore_warnings(lshf.partial_fit)(X)
    assert_array_equal(X, lshf._fit_X)

    ignore_warnings(lshf.fit)(X)

    # Insert wrong dimension
    assert_raises(ValueError, lshf.partial_fit,
                  np.random.randn(n_samples_partial_fit, n_features - 1))

    ignore_warnings(lshf.partial_fit)(X_partial_fit)

    # size of _input_array = samples + 1 after insertion
    assert_equal(lshf._fit_X.shape[0],
                 n_samples + n_samples_partial_fit)
    # size of original_indices_[1] = samples + 1
    assert_equal(len(lshf.original_indices_[0]),
                 n_samples + n_samples_partial_fit)
    # size of trees_[1] = samples + 1
    assert_equal(len(lshf.trees_[1]),
                 n_samples + n_samples_partial_fit) 
Example #19
Source File: test_approximate.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_hash_functions():
    # Checks randomness of hash functions.
    # Variance and mean of each hash function (projection vector)
    # should be different from flattened array of hash functions.
    # If hash functions are not randomly built (seeded with
    # same value), variances and means of all functions are equal.
    n_samples = 12
    n_features = 2
    n_estimators = 5
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
        n_estimators=n_estimators,
        random_state=rng.randint(0, np.iinfo(np.int32).max))
    ignore_warnings(lshf.fit)(X)

    hash_functions = []
    for i in range(n_estimators):
        hash_functions.append(lshf.hash_functions_[i].components_)

    for i in range(n_estimators):
        assert_not_equal(np.var(hash_functions),
                         np.var(lshf.hash_functions_[i].components_))

    for i in range(n_estimators):
        assert_not_equal(np.mean(hash_functions),
                         np.mean(lshf.hash_functions_[i].components_)) 
Example #20
Source File: test_dict_learning.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
                                  transform_alpha=0.001, random_state=0,
                                  n_jobs=-1)
        with ignore_warnings(category=ConvergenceWarning):
            code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_), X_read_only,
                                  decimal=2) 
Example #21
Source File: estimator_checks.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def check_estimator_sparse_data(name, estimator_orig):
    rng = np.random.RandomState(0)
    X = rng.rand(40, 10)
    X[X < .8] = 0
    X_csr = sparse.csr_matrix(X)
    y = (4 * rng.rand(40)).astype(np.int)
    # catch deprecation warnings
    with ignore_warnings(category=DeprecationWarning):
        estimator = clone(estimator_orig)
    y = multioutput_estimator_convert_y_2d(estimator, y)
    for sparse_format in ['csr', 'csc', 'dok', 'lil', 'coo', 'dia', 'bsr']:
        X = X_csr.asformat(sparse_format)
        # catch deprecation warnings
        with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
            if name in ['Scaler', 'StandardScaler']:
                estimator = clone(estimator).set_params(with_mean=False)
            else:
                estimator = clone(estimator)
        # fit and predict
        try:
            with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
                estimator.fit(X, y)
            if hasattr(estimator, "predict"):
                pred = estimator.predict(X)
                assert_equal(pred.shape, (X.shape[0],))
            if hasattr(estimator, 'predict_proba'):
                probs = estimator.predict_proba(X)
                assert_equal(probs.shape, (X.shape[0], 4))
        except TypeError as e:
            if 'sparse' not in repr(e):
                print("Estimator %s doesn't seem to fail gracefully on "
                      "sparse data: error message state explicitly that "
                      "sparse input is not supported if this is not the case."
                      % name)
                raise
        except Exception:
            print("Estimator %s doesn't seem to fail gracefully on "
                  "sparse data: it should raise a TypeError if sparse input "
                  "is explicitly not supported." % name)
            raise 
Example #22
Source File: keyword_based.py    From conversational-datasets with Apache License 2.0 5 votes vote down vote up
def _vectorize(self, strings):
        """Vectorize the given strings."""
        with ignore_warnings():
            # Ignore deprecated `non_negative` warning.
            tf_idf_vectors = self._tfidf_transform.transform(
                self._vectorizer.transform(strings))
        tf_idf_vectors = sp.csr_matrix(
            tf_idf_vectors, dtype=np.float64, copy=True)

        # Document length (number of terms) in each row
        # Shape is (n_samples, 1)
        document_lengths = tf_idf_vectors.sum(axis=1)

        # Number of non-zero elements in each row
        # Shape is (n_samples, )
        num_terms = tf_idf_vectors.indptr[1:] - tf_idf_vectors.indptr[0:-1]

        # In each row, repeat `document_lengths` for `num_terms` times
        # Shape is (sum(num_terms), )
        rep = np.repeat(np.asarray(document_lengths), num_terms)

        # Compute BM25 score only for non-zero elements
        data = tf_idf_vectors.data * (self._k1 + 1) / (
            tf_idf_vectors.data + self._k1 * (
                1 - self._b + self._b * rep / self._average_document_length))

        vectors = sp.csr_matrix(
            (data, tf_idf_vectors.indices, tf_idf_vectors.indptr),
            shape=tf_idf_vectors.shape)
        vectors = vectors * self._idf_diag

        return vectors 
Example #23
Source File: test_feature_select.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_selectpercentile_tiebreaking():
    # Test if SelectPercentile selects the right n_features in case of ties.
    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
    y = [1]
    dummy_score = lambda X, y: (X[0], X[0])
    for X in Xs:
        sel = SelectPercentile(dummy_score, percentile=34)
        X1 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X1.shape[1], 1)
        assert_best_scores_kept(sel)

        sel = SelectPercentile(dummy_score, percentile=67)
        X2 = ignore_warnings(sel.fit_transform)([X], y)
        assert_equal(X2.shape[1], 2)
        assert_best_scores_kept(sel) 
Example #24
Source File: test_iforest.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test) 
Example #25
Source File: estimator_checks.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def check_classifiers_one_label(name, classifier_orig):
    error_string_fit = "Classifier can't train when only one class is present."
    error_string_predict = ("Classifier can't predict when only one class is "
                            "present.")
    rnd = np.random.RandomState(0)
    X_train = rnd.uniform(size=(10, 3))
    X_test = rnd.uniform(size=(10, 3))
    y = np.ones(10)
    # catch deprecation warnings
    with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
        classifier = clone(classifier_orig)
        # try to fit
        try:
            classifier.fit(X_train, y)
        except ValueError as e:
            if 'class' not in repr(e):
                print(error_string_fit, classifier, e)
                traceback.print_exc(file=sys.stdout)
                raise e
            else:
                return
        except Exception as exc:
            print(error_string_fit, classifier, exc)
            traceback.print_exc(file=sys.stdout)
            raise exc
        # predict
        try:
            assert_array_equal(classifier.predict(X_test), y)
        except Exception as exc:
            print(error_string_predict, classifier, exc)
            raise exc 
Example #26
Source File: estimator_checks.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_classifiers_one_label(name, classifier_orig):
    error_string_fit = "Classifier can't train when only one class is present."
    error_string_predict = ("Classifier can't predict when only one class is "
                            "present.")
    rnd = np.random.RandomState(0)
    X_train = rnd.uniform(size=(10, 3))
    X_test = rnd.uniform(size=(10, 3))
    y = np.ones(10)
    # catch deprecation warnings
    with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
        classifier = clone(classifier_orig)
        # try to fit
        try:
            classifier.fit(X_train, y)
        except ValueError as e:
            if 'class' not in repr(e):
                print(error_string_fit, classifier, e)
                traceback.print_exc(file=sys.stdout)
                raise e
            else:
                return
        except Exception as exc:
            print(error_string_fit, classifier, exc)
            traceback.print_exc(file=sys.stdout)
            raise exc
        # predict
        try:
            assert_array_equal(classifier.predict(X_test), y)
        except Exception as exc:
            print(error_string_predict, classifier, exc)
            raise exc 
Example #27
Source File: estimator_checks.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_estimator_sparse_data(name, estimator_orig):
    rng = np.random.RandomState(0)
    X = rng.rand(40, 10)
    X[X < .8] = 0
    X_csr = sparse.csr_matrix(X)
    y = (4 * rng.rand(40)).astype(np.int)
    # catch deprecation warnings
    with ignore_warnings(category=DeprecationWarning):
        estimator = clone(estimator_orig)
    y = multioutput_estimator_convert_y_2d(estimator, y)
    for sparse_format in ['csr', 'csc', 'dok', 'lil', 'coo', 'dia', 'bsr']:
        X = X_csr.asformat(sparse_format)
        # catch deprecation warnings
        with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
            if name in ['Scaler', 'StandardScaler']:
                estimator = clone(estimator).set_params(with_mean=False)
            else:
                estimator = clone(estimator)
        # fit and predict
        try:
            with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
                estimator.fit(X, y)
            if hasattr(estimator, "predict"):
                pred = estimator.predict(X)
                assert_equal(pred.shape, (X.shape[0],))
            if hasattr(estimator, 'predict_proba'):
                probs = estimator.predict_proba(X)
                assert_equal(probs.shape, (X.shape[0], 4))
        except TypeError as e:
            if 'sparse' not in repr(e):
                print("Estimator %s doesn't seem to fail gracefully on "
                      "sparse data: error message state explicitly that "
                      "sparse input is not supported if this is not the case."
                      % name)
                raise
        except Exception:
            print("Estimator %s doesn't seem to fail gracefully on "
                  "sparse data: it should raise a TypeError if sparse input "
                  "is explicitly not supported." % name)
            raise 
Example #28
Source File: test_iforest.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test) 
Example #29
Source File: test_knorae.py    From DESlib with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_check_estimator():
    with ignore_warnings(category=RuntimeWarning):
        check_estimator(KNORAE) 
Example #30
Source File: test_sklearn_mlp_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def ignore_warnings(category=Warning):
            return lambda x: x