Python Examples of sklearn.utils.validation.check_random

Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def check_zero_or_all_relevant_labels(lrap_score):
    random_state = check_random_state(0)

    for n_labels in range(2, 5):
        y_score = random_state.uniform(size=(1, n_labels))
        y_score_ties = np.zeros_like(y_score)

        # No relevant labels
        y_true = np.zeros((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

        # Only relevant labels
        y_true = np.ones((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

    # Degenerate case: only one label
    assert_almost_equal(lrap_score([[1], [0], [1], [0]],
                                   [[0.5], [0.5], [0.5], [0.5]]), 1.)

Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_thresholded_multilabel_multioutput_permutations_invariance(name):
    random_state = check_random_state(0)
    n_samples, n_classes = 20, 4
    y_true = random_state.randint(0, 2, size=(n_samples, n_classes))
    y_score = random_state.normal(size=y_true.shape)

    # Makes sure all samples have at least one label. This works around errors
    # when running metrics where average="sample"
    y_true[y_true.sum(1) == 4, 0] = 0
    y_true[y_true.sum(1) == 0, 0] = 1

    metric = ALL_METRICS[name]
    score = metric(y_true, y_score)

    for perm in permutations(range(n_classes), n_classes):
        y_score_perm = y_score[:, perm]
        y_true_perm = y_true[:, perm]

        current_score = metric(y_true_perm, y_score_perm)
        assert_almost_equal(score, current_score)

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

6 votes

def test_KNeighborsRegressor_multioutput_uniform_weight(algorithm, weights):
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    knn = neighbors.KNeighborsRegressor(weights=weights,
                                        algorithm=algorithm)
    knn.fit(X_train, y_train)

    neigh_idx = knn.kneighbors(X_test, return_distance=False)
    y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                           for idx in neigh_idx])

    y_pred = knn.predict(X_test)

    assert_equal(y_pred.shape, y_test.shape)
    assert_equal(y_pred_idx.shape, y_test.shape)
    assert_array_almost_equal(y_pred, y_pred_idx)

Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multilabel_sample_weight_invariance(name):
    # multilabel indicator
    random_state = check_random_state(0)
    _, ya = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=0, n_samples=100,
                                           allow_unlabeled=False)
    _, yb = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=1, n_samples=100,
                                           allow_unlabeled=False)
    y_true = np.vstack([ya, yb])
    y_pred = np.vstack([ya, ya])
    y_score = random_state.randint(1, 4, size=y_true.shape)

    metric = ALL_METRICS[name]
    if name in THRESHOLDED_METRICS:
        check_sample_weight_invariance(name, metric, y_true, y_score)
    else:
        check_sample_weight_invariance(name, metric, y_true, y_pred)

Source File: data.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def _check_inputs(
        self,
        X: Union[ArrayLike, DataFrameType],
        accept_sparse_negative: bool = False,
        copy: bool = False,
        in_fit: bool = True,
    ) -> Union[ArrayLike, DataFrameType]:
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values
        if isinstance(X, np.ndarray):
            C = len(X) // min(multiprocessing.cpu_count(), 2)
            X = da.from_array(X, chunks=C)

        rng = check_random_state(self.random_state)
        # TODO: non-float dtypes?
        # TODO: sparse arrays?
        # TODO: mix of sparse, dense?
        sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype)
        super(QuantileTransformer, self)._check_inputs(
            sample,
            accept_sparse_negative=accept_sparse_negative,
            copy=copy,
            in_fit=in_fit,
        )
        return X

Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multilabel_label_permutations_invariance(name):
    random_state = check_random_state(0)
    n_samples, n_classes = 20, 4

    y_true = random_state.randint(0, 2, size=(n_samples, n_classes))
    y_score = random_state.randint(0, 2, size=(n_samples, n_classes))

    metric = ALL_METRICS[name]
    score = metric(y_true, y_score)

    for perm in permutations(range(n_classes), n_classes):
        y_score_perm = y_score[:, perm]
        y_true_perm = y_true[:, perm]

        current_score = metric(y_true_perm, y_score_perm)
        assert_almost_equal(score, current_score)

Source File: cart.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License

6 votes

def __init__(self, X, y, criterion, min_samples_split, max_depth,
                 n_val_sample, random_state):
        # make sure max_depth > 1
        if max_depth < 2:
            raise ValueError("max depth must be > 1")

        # check the input arrays, and if it's classification validate the
        # target values in y
        X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True)
        if is_classifier(self):
            check_classification_targets(y)

        # hyper parameters so we can later inspect attributes of the model
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.n_val_sample = n_val_sample
        self.random_state = random_state

        # create the splitting class
        random_state = check_random_state(random_state)
        self.splitter = RandomSplitter(random_state, criterion, n_val_sample)

        # grow the tree depth first
        self.tree = self._find_next_split(X, y, 0)

Source File: test_skewness.py From skoot with MIT License

6 votes

def test_yj_fit_transform():
    yj = YeoJohnsonTransformer(cols=X.columns[:2])  # just first two cols
    trans = yj.fit_transform(X)

    assert isinstance(trans, pd.DataFrame)

    # Test it on a random...
    m, n = 1000, 5
    random_state = check_random_state(42)
    x = random_state.rand(m, n)

    # make some random
    mask = random_state.rand(m, n) % 2 < 0.5
    signs = np.ones((m, n))
    signs[~mask] = -1
    x *= signs

    YeoJohnsonTransformer().fit(x)

Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def check_alternative_lrap_implementation(lrap_score, n_classes=5,
                                          n_samples=20, random_state=0):
    _, y_true = make_multilabel_classification(n_features=1,
                                               allow_unlabeled=False,
                                               random_state=random_state,
                                               n_classes=n_classes,
                                               n_samples=n_samples)

    # Score with ties
    y_score = sparse_random_matrix(n_components=y_true.shape[0],
                                   n_features=y_true.shape[1],
                                   random_state=random_state)

    if hasattr(y_score, "toarray"):
        y_score = y_score.toarray()
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

    # Uniform score
    random_state = check_random_state(random_state)
    y_score = random_state.uniform(size=(n_samples, n_classes))
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

Source File: test_forest.py From twitter-stock-recommendation with MIT License

6 votes

def test_parallel_train():
    rng = check_random_state(12321)
    n_samples, n_features = 80, 30
    X_train = rng.randn(n_samples, n_features)
    y_train = rng.randint(0, 2, n_samples)

    clfs = [
        RandomForestClassifier(n_estimators=20, n_jobs=n_jobs,
                               random_state=12345).fit(X_train, y_train)
        for n_jobs in [1, 2, 3, 8, 16, 32]
    ]

    X_test = rng.randn(n_samples, n_features)
    probas = [clf.predict_proba(X_test) for clf in clfs]
    for proba1, proba2 in zip(probas, probas[1:]):
        assert_array_almost_equal(proba1, proba2)

Source File: rand.py From revrand with Apache License 2.0

6 votes

def endless_permutations(N, random_state=None):
    """
    Generate an endless sequence of random integers from permutations of the
    set [0, ..., N).

    If we call this N times, we will sweep through the entire set without
    replacement, on the (N+1)th call a new permutation will be created, etc.

    Parameters
    ----------
    N: int
        the length of the set
    random_state: int or RandomState, optional
        random seed

    Yields
    ------
    int:
        a random int from the set [0, ..., N)
    """
    generator = check_random_state(random_state)
    while True:
        batch_inds = generator.permutation(N)
        for b in batch_inds:
            yield b

Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License

6 votes

def test_get_subtree():
    """Check that get subtree does the same thing for self and new programs"""

    params = {'function_set': [add2, sub2, mul2, div2],
              'arities': {2: [add2, sub2, mul2, div2]},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)

    self_test = gp.get_subtree(check_random_state(0))
    external_test = gp.get_subtree(check_random_state(0), test_gp)

    assert_equal(self_test, external_test)

Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_parallel_train():
    rng = check_random_state(12321)
    n_samples, n_features = 80, 30
    X_train = rng.randn(n_samples, n_features)
    y_train = rng.randint(0, 2, n_samples)

    clfs = [
        RandomForestClassifier(n_estimators=20, n_jobs=n_jobs,
                               random_state=12345).fit(X_train, y_train)
        for n_jobs in [1, 2, 3, 8, 16, 32]
    ]

    X_test = rng.randn(n_samples, n_features)
    probas = [clf.predict_proba(X_test) for clf in clfs]
    for proba1, proba2 in zip(probas, probas[1:]):
        assert_array_almost_equal(proba1, proba2)

Source File: test_ranking.py From twitter-stock-recommendation with MIT License

6 votes

def check_zero_or_all_relevant_labels(lrap_score):
    random_state = check_random_state(0)

    for n_labels in range(2, 5):
        y_score = random_state.uniform(size=(1, n_labels))
        y_score_ties = np.zeros_like(y_score)

        # No relevant labels
        y_true = np.zeros((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

        # Only relevant labels
        y_true = np.ones((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

    # Degenerate case: only one label
    assert_almost_equal(lrap_score([[1], [0], [1], [0]],
                                   [[0.5], [0.5], [0.5], [0.5]]), 1.)

Source File: test_ranking.py From twitter-stock-recommendation with MIT License

6 votes

def check_alternative_lrap_implementation(lrap_score, n_classes=5,
                                          n_samples=20, random_state=0):
    _, y_true = make_multilabel_classification(n_features=1,
                                               allow_unlabeled=False,
                                               random_state=random_state,
                                               n_classes=n_classes,
                                               n_samples=n_samples)

    # Score with ties
    y_score = sparse_random_matrix(n_components=y_true.shape[0],
                                   n_features=y_true.shape[1],
                                   random_state=random_state)

    if hasattr(y_score, "toarray"):
        y_score = y_score.toarray()
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

    # Uniform score
    random_state = check_random_state(random_state)
    y_score = random_state.uniform(size=(n_samples, n_classes))
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License

6 votes

def test_execute():
    """Check executing the program works"""

    params = {'function_set': [add2, sub2, mul2, div2],
              'arities': {2: [add2, sub2, mul2, div2]},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    gp = _Program(random_state=random_state, program=test_gp, **params)
    result = gp.execute(X)
    expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618]
    assert_array_almost_equal(result, expected)

Source File: test_tree.py From twitter-stock-recommendation with MIT License

5 votes

def test_only_constant_features():
    random_state = check_random_state(0)
    X = np.zeros((10, 20))
    y = random_state.randint(0, 2, (10, ))
    for name, TreeEstimator in ALL_TREES.items():
        est = TreeEstimator(random_state=0)
        est.fit(X, y)
        assert_equal(est.tree_.max_depth, 0)

Source File: test_forest.py From twitter-stock-recommendation with MIT License

5 votes

def check_importances(name, criterion, dtype, tolerance):
    # cast as dype
    X = X_large.astype(dtype, copy=False)
    y = y_large.astype(dtype, copy=False)

    ForestEstimator = FOREST_ESTIMATORS[name]

    est = ForestEstimator(n_estimators=10, criterion=criterion,
                          random_state=0)
    est.fit(X, y)
    importances = est.feature_importances_

    # The forest estimator can detect that only the first 3 features of the
    # dataset are informative:
    n_important = np.sum(importances > 0.1)
    assert_equal(importances.shape[0], 10)
    assert_equal(n_important, 3)
    assert np.all(importances[:3] > 0.1)

    # Check with parallel
    importances = est.feature_importances_
    est.set_params(n_jobs=2)
    importances_parrallel = est.feature_importances_
    assert_array_almost_equal(importances, importances_parrallel)

    # Check with sample weights
    sample_weight = check_random_state(0).randint(1, 10, len(X))
    est = ForestEstimator(n_estimators=10, random_state=0, criterion=criterion)
    est.fit(X, y, sample_weight=sample_weight)
    importances = est.feature_importances_
    assert_true(np.all(importances >= 0.0))

    for scale in [0.5, 100]:
        est = ForestEstimator(n_estimators=10, random_state=0,
                              criterion=criterion)
        est.fit(X, y, sample_weight=scale * sample_weight)
        importances_bis = est.feature_importances_
        assert_less(np.abs(importances - importances_bis).mean(), tolerance)

Source File: base.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        Returns
        -------
        self : object
            Returns self.
        """
        self.random_state_ = check_random_state(self.random_state)

        # Check if the pool of classifiers is None. If yes, use a
        # BaggingClassifier for the pool.
        if self.pool_classifiers is None:
            self.pool_classifiers_ = BaggingClassifier(
                random_state=self.random_state_)
            self.pool_classifiers_.fit(X, y)

        else:
            self.pool_classifiers_ = self.pool_classifiers

        self.n_classifiers_ = len(self.pool_classifiers_)

        # dealing with label encoder
        self.check_label_encoder()
        self.y_enc_ = self._setup_label_encoder(y)

        self.n_classes_ = self.classes_.size
        self.n_features_ = X.shape[1]

        return self

Source File: datasets.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def make_xor(n_samples, random_state=None):
    """Generate the exclusive-or (XOR) dataset.

    Parameters
    ----------
    n_samples : int
                Number of generated data points.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Returns
    -------
    X : array of shape = [size_classes, 2]
        The generated data points.

    y : array of shape = [size_classes]
        Class labels associated with each class.

    """
    rng = check_random_state(random_state)
    X = rng.uniform(low=0, high=1, size=(n_samples, 2))
    y = np.logical_xor(X[:, 0] > 0.5, X[:, 1] > 0.5)

    return X, y

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(algorithm, weights):
    # Test radius neighbors in multi-output regression (uniform weight)

    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    rnn = neighbors. RadiusNeighborsRegressor(weights=weights,
                                              algorithm=algorithm)
    rnn.fit(X_train, y_train)

    neigh_idx = rnn.radius_neighbors(X_test, return_distance=False)
    y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                           for idx in neigh_idx])

    y_pred_idx = np.array(y_pred_idx)
    y_pred = rnn.predict(X_test)

    assert_equal(y_pred_idx.shape, y_test.shape)
    assert_equal(y_pred.shape, y_test.shape)
    assert_array_almost_equal(y_pred, y_pred_idx)

Source File: test_ranking.py From twitter-stock-recommendation with MIT License

5 votes

def test_auc_score_non_binary_class():
    # Test that roc_auc_score function returns an error when trying
    # to compute AUC for non-binary class values.
    rng = check_random_state(404)
    y_pred = rng.rand(10)
    # y_true contains only one class value
    y_true = np.zeros(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = -np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
    assert_raise_message(ValueError, "multiclass format is not supported",
                         roc_auc_score, y_true, y_pred)

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        rng = check_random_state(404)
        y_pred = rng.rand(10)
        # y_true contains only one class value
        y_true = np.zeros(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = -np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)

        # y_true contains three different class values
        y_true = rng.randint(0, 3, size=10)
        assert_raise_message(ValueError, "multiclass format is not supported",
                             roc_auc_score, y_true, y_pred)

Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def make_regression_problem(n_instances=20, n_columns=1, n_timepoints=20,
                            random_state=None):
    rng = check_random_state(random_state)
    y = pd.Series(rng.normal(size=n_instances))

    X = _make_series_as_features_X(y, n_columns, n_timepoints,
                                   random_state=random_state)
    return X, y

Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def make_classification_problem(n_instances=20, n_columns=1,
                                n_timepoints=20, n_classes=2,
                                random_state=None):
    rng = check_random_state(random_state)
    y = pd.Series(np.hstack([np.arange(n_classes),
                             rng.randint(0, n_classes,
                                         size=n_instances - n_classes)]))

    X = _make_series_as_features_X(y, n_columns, n_timepoints,
                                   random_state=random_state)

    return X, y

Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def _make_series_as_features_X(y, n_columns, n_timepoints,
                               random_state=None):
    n_instances = len(y)
    rng = check_random_state(random_state)
    columns = []
    for i in range(n_columns):
        rows = []
        for j in range(n_instances):
            # we use the y value for the mean of the generated time series
            row = pd.Series(rng.normal(loc=y.iloc[j] * 20, scale=0.5,
                                       size=n_timepoints))
            rows.append(row)
        column = pd.Series(rows)
        columns.append(column)
    return pd.DataFrame(columns).T

Source File: forecasting.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def make_forecasting_problem(n_timepoints=50, random_state=None):
    rng = check_random_state(random_state)
    return pd.Series(rng.random(size=n_timepoints),
                     index=pd.Int64Index(np.arange(n_timepoints)))

Source File: randomadder.py From scikit-lego with MIT License

5 votes

def transform_train(self, X):
        rs = check_random_state(self.random_state)
        check_is_fitted(self, ["dim_"])

        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)

        return X + rs.normal(0, self.noise, size=X.shape)

Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License

5 votes

def test_indices():
    """Check that indices are stable when generated on the fly."""

    params = {'function_set': [add2, sub2, mul2, div2],
              'arities': {2: [add2, sub2, mul2, div2]},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)

    assert_raises(ValueError, gp.get_all_indices)
    assert_raises(ValueError, gp._indices)

    def get_indices_property():
        return gp.indices_

    assert_raises(ValueError, get_indices_property)

    indices, _ = gp.get_all_indices(10, 7, random_state)

    assert_array_equal(indices, gp.get_all_indices()[0])
    assert_array_equal(indices, gp._indices())
    assert_array_equal(indices, gp.indices_)

Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License

5 votes

def test_transformer_iterable():
    """Check that the transformer is iterable"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg',
                    'inv', 'max', 'min']
    est = SymbolicTransformer(population_size=500, generations=2,
                              function_set=function_set, random_state=0)

    # Check unfitted
    unfitted_len = len(est)
    unfitted_iter = [gp.length_ for gp in est]
    expected_iter = []

    assert(unfitted_len == 0)
    assert(unfitted_iter == expected_iter)

    # Check fitted
    est.fit(X, y)
    fitted_len = len(est)
    fitted_iter = [gp.length_ for gp in est]
    expected_iter = [8, 12, 2, 29, 9, 33, 9, 8, 4, 22]

    assert(fitted_len == 10)
    assert(fitted_iter == expected_iter)

    # Check IndexError
    assert_raises(IndexError, est.__getitem__, 10)

Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License

5 votes

def test_output_shape():
    """Check output shape is as expected"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the transformer
    est = SymbolicTransformer(population_size=100, generations=2,
                              n_components=5, random_state=0)
    est.fit(X, y)
    assert(est.transform(X).shape == (5, 5))

Python sklearn.utils.validation.check_random_state() Examples