Python sklearn.utils.validation.check_random_state() Examples
The following are 30
code examples of sklearn.utils.validation.check_random_state().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.validation
, or try the search function
.
Example #1
Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_zero_or_all_relevant_labels(lrap_score): random_state = check_random_state(0) for n_labels in range(2, 5): y_score = random_state.uniform(size=(1, n_labels)) y_score_ties = np.zeros_like(y_score) # No relevant labels y_true = np.zeros((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Only relevant labels y_true = np.ones((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Degenerate case: only one label assert_almost_equal(lrap_score([[1], [0], [1], [0]], [[0.5], [0.5], [0.5], [0.5]]), 1.)
Example #2
Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_thresholded_multilabel_multioutput_permutations_invariance(name): random_state = check_random_state(0) n_samples, n_classes = 20, 4 y_true = random_state.randint(0, 2, size=(n_samples, n_classes)) y_score = random_state.normal(size=y_true.shape) # Makes sure all samples have at least one label. This works around errors # when running metrics where average="sample" y_true[y_true.sum(1) == 4, 0] = 0 y_true[y_true.sum(1) == 0, 0] = 1 metric = ALL_METRICS[name] score = metric(y_true, y_score) for perm in permutations(range(n_classes), n_classes): y_score_perm = y_score[:, perm] y_true_perm = y_true[:, perm] current_score = metric(y_true_perm, y_score_perm) assert_almost_equal(score, current_score)
Example #3
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_KNeighborsRegressor_multioutput_uniform_weight(algorithm, weights): # Test k-neighbors in multi-output regression with uniform weight rng = check_random_state(0) n_features = 5 n_samples = 40 n_output = 4 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_output) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) knn = neighbors.KNeighborsRegressor(weights=weights, algorithm=algorithm) knn.fit(X_train, y_train) neigh_idx = knn.kneighbors(X_test, return_distance=False) y_pred_idx = np.array([np.mean(y_train[idx], axis=0) for idx in neigh_idx]) y_pred = knn.predict(X_test) assert_equal(y_pred.shape, y_test.shape) assert_equal(y_pred_idx.shape, y_test.shape) assert_array_almost_equal(y_pred, y_pred_idx)
Example #4
Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multilabel_sample_weight_invariance(name): # multilabel indicator random_state = check_random_state(0) _, ya = make_multilabel_classification(n_features=1, n_classes=20, random_state=0, n_samples=100, allow_unlabeled=False) _, yb = make_multilabel_classification(n_features=1, n_classes=20, random_state=1, n_samples=100, allow_unlabeled=False) y_true = np.vstack([ya, yb]) y_pred = np.vstack([ya, ya]) y_score = random_state.randint(1, 4, size=y_true.shape) metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: check_sample_weight_invariance(name, metric, y_true, y_score) else: check_sample_weight_invariance(name, metric, y_true, y_pred)
Example #5
Source File: data.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _check_inputs( self, X: Union[ArrayLike, DataFrameType], accept_sparse_negative: bool = False, copy: bool = False, in_fit: bool = True, ) -> Union[ArrayLike, DataFrameType]: if isinstance(X, (pd.DataFrame, dd.DataFrame)): X = X.values if isinstance(X, np.ndarray): C = len(X) // min(multiprocessing.cpu_count(), 2) X = da.from_array(X, chunks=C) rng = check_random_state(self.random_state) # TODO: non-float dtypes? # TODO: sparse arrays? # TODO: mix of sparse, dense? sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype) super(QuantileTransformer, self)._check_inputs( sample, accept_sparse_negative=accept_sparse_negative, copy=copy, in_fit=in_fit, ) return X
Example #6
Source File: test_common.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_multilabel_label_permutations_invariance(name): random_state = check_random_state(0) n_samples, n_classes = 20, 4 y_true = random_state.randint(0, 2, size=(n_samples, n_classes)) y_score = random_state.randint(0, 2, size=(n_samples, n_classes)) metric = ALL_METRICS[name] score = metric(y_true, y_score) for perm in permutations(range(n_classes), n_classes): y_score_perm = y_score[:, perm] y_true_perm = y_true[:, perm] current_score = metric(y_true_perm, y_score_perm) assert_almost_equal(score, current_score)
Example #7
Source File: cart.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 6 votes |
def __init__(self, X, y, criterion, min_samples_split, max_depth, n_val_sample, random_state): # make sure max_depth > 1 if max_depth < 2: raise ValueError("max depth must be > 1") # check the input arrays, and if it's classification validate the # target values in y X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True) if is_classifier(self): check_classification_targets(y) # hyper parameters so we can later inspect attributes of the model self.min_samples_split = min_samples_split self.max_depth = max_depth self.n_val_sample = n_val_sample self.random_state = random_state # create the splitting class random_state = check_random_state(random_state) self.splitter = RandomSplitter(random_state, criterion, n_val_sample) # grow the tree depth first self.tree = self._find_next_split(X, y, 0)
Example #8
Source File: test_skewness.py From skoot with MIT License | 6 votes |
def test_yj_fit_transform(): yj = YeoJohnsonTransformer(cols=X.columns[:2]) # just first two cols trans = yj.fit_transform(X) assert isinstance(trans, pd.DataFrame) # Test it on a random... m, n = 1000, 5 random_state = check_random_state(42) x = random_state.rand(m, n) # make some random mask = random_state.rand(m, n) % 2 < 0.5 signs = np.ones((m, n)) signs[~mask] = -1 x *= signs YeoJohnsonTransformer().fit(x)
Example #9
Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_alternative_lrap_implementation(lrap_score, n_classes=5, n_samples=20, random_state=0): _, y_true = make_multilabel_classification(n_features=1, allow_unlabeled=False, random_state=random_state, n_classes=n_classes, n_samples=n_samples) # Score with ties y_score = sparse_random_matrix(n_components=y_true.shape[0], n_features=y_true.shape[1], random_state=random_state) if hasattr(y_score, "toarray"): y_score = y_score.toarray() score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap) # Uniform score random_state = check_random_state(random_state) y_score = random_state.uniform(size=(n_samples, n_classes)) score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap)
Example #10
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_parallel_train(): rng = check_random_state(12321) n_samples, n_features = 80, 30 X_train = rng.randn(n_samples, n_features) y_train = rng.randint(0, 2, n_samples) clfs = [ RandomForestClassifier(n_estimators=20, n_jobs=n_jobs, random_state=12345).fit(X_train, y_train) for n_jobs in [1, 2, 3, 8, 16, 32] ] X_test = rng.randn(n_samples, n_features) probas = [clf.predict_proba(X_test) for clf in clfs] for proba1, proba2 in zip(probas, probas[1:]): assert_array_almost_equal(proba1, proba2)
Example #11
Source File: rand.py From revrand with Apache License 2.0 | 6 votes |
def endless_permutations(N, random_state=None): """ Generate an endless sequence of random integers from permutations of the set [0, ..., N). If we call this N times, we will sweep through the entire set without replacement, on the (N+1)th call a new permutation will be created, etc. Parameters ---------- N: int the length of the set random_state: int or RandomState, optional random seed Yields ------ int: a random int from the set [0, ..., N) """ generator = check_random_state(random_state) while True: batch_inds = generator.permutation(N) for b in batch_inds: yield b
Example #12
Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_get_subtree(): """Check that get subtree does the same thing for self and new programs""" params = {'function_set': [add2, sub2, mul2, div2], 'arities': {2: [add2, sub2, mul2, div2]}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) self_test = gp.get_subtree(check_random_state(0)) external_test = gp.get_subtree(check_random_state(0), test_gp) assert_equal(self_test, external_test)
Example #13
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_parallel_train(): rng = check_random_state(12321) n_samples, n_features = 80, 30 X_train = rng.randn(n_samples, n_features) y_train = rng.randint(0, 2, n_samples) clfs = [ RandomForestClassifier(n_estimators=20, n_jobs=n_jobs, random_state=12345).fit(X_train, y_train) for n_jobs in [1, 2, 3, 8, 16, 32] ] X_test = rng.randn(n_samples, n_features) probas = [clf.predict_proba(X_test) for clf in clfs] for proba1, proba2 in zip(probas, probas[1:]): assert_array_almost_equal(proba1, proba2)
Example #14
Source File: test_ranking.py From twitter-stock-recommendation with MIT License | 6 votes |
def check_zero_or_all_relevant_labels(lrap_score): random_state = check_random_state(0) for n_labels in range(2, 5): y_score = random_state.uniform(size=(1, n_labels)) y_score_ties = np.zeros_like(y_score) # No relevant labels y_true = np.zeros((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Only relevant labels y_true = np.ones((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Degenerate case: only one label assert_almost_equal(lrap_score([[1], [0], [1], [0]], [[0.5], [0.5], [0.5], [0.5]]), 1.)
Example #15
Source File: test_ranking.py From twitter-stock-recommendation with MIT License | 6 votes |
def check_alternative_lrap_implementation(lrap_score, n_classes=5, n_samples=20, random_state=0): _, y_true = make_multilabel_classification(n_features=1, allow_unlabeled=False, random_state=random_state, n_classes=n_classes, n_samples=n_samples) # Score with ties y_score = sparse_random_matrix(n_components=y_true.shape[0], n_features=y_true.shape[1], random_state=random_state) if hasattr(y_score, "toarray"): y_score = y_score.toarray() score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap) # Uniform score random_state = check_random_state(random_state) y_score = random_state.uniform(size=(n_samples, n_classes)) score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap)
Example #16
Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_execute(): """Check executing the program works""" params = {'function_set': [add2, sub2, mul2, div2], 'arities': {2: [add2, sub2, mul2, div2]}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] X = np.reshape(random_state.uniform(size=50), (5, 10)) gp = _Program(random_state=random_state, program=test_gp, **params) result = gp.execute(X) expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618] assert_array_almost_equal(result, expected)
Example #17
Source File: test_tree.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_only_constant_features(): random_state = check_random_state(0) X = np.zeros((10, 20)) y = random_state.randint(0, 2, (10, )) for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(random_state=0) est.fit(X, y) assert_equal(est.tree_.max_depth, 0)
Example #18
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_importances(name, criterion, dtype, tolerance): # cast as dype X = X_large.astype(dtype, copy=False) y = y_large.astype(dtype, copy=False) ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(n_estimators=10, criterion=criterion, random_state=0) est.fit(X, y) importances = est.feature_importances_ # The forest estimator can detect that only the first 3 features of the # dataset are informative: n_important = np.sum(importances > 0.1) assert_equal(importances.shape[0], 10) assert_equal(n_important, 3) assert np.all(importances[:3] > 0.1) # Check with parallel importances = est.feature_importances_ est.set_params(n_jobs=2) importances_parrallel = est.feature_importances_ assert_array_almost_equal(importances, importances_parrallel) # Check with sample weights sample_weight = check_random_state(0).randint(1, 10, len(X)) est = ForestEstimator(n_estimators=10, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=sample_weight) importances = est.feature_importances_ assert_true(np.all(importances >= 0.0)) for scale in [0.5, 100]: est = ForestEstimator(n_estimators=10, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=scale * sample_weight) importances_bis = est.feature_importances_ assert_less(np.abs(importances - importances_bis).mean(), tolerance)
Example #19
Source File: base.py From DESlib with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : array of shape (n_samples, n_features) Data used to fit the model. y : array of shape (n_samples) class labels of each example in X. Returns ------- self : object Returns self. """ self.random_state_ = check_random_state(self.random_state) # Check if the pool of classifiers is None. If yes, use a # BaggingClassifier for the pool. if self.pool_classifiers is None: self.pool_classifiers_ = BaggingClassifier( random_state=self.random_state_) self.pool_classifiers_.fit(X, y) else: self.pool_classifiers_ = self.pool_classifiers self.n_classifiers_ = len(self.pool_classifiers_) # dealing with label encoder self.check_label_encoder() self.y_enc_ = self._setup_label_encoder(y) self.n_classes_ = self.classes_.size self.n_features_ = X.shape[1] return self
Example #20
Source File: datasets.py From DESlib with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_xor(n_samples, random_state=None): """Generate the exclusive-or (XOR) dataset. Parameters ---------- n_samples : int Number of generated data points. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Returns ------- X : array of shape = [size_classes, 2] The generated data points. y : array of shape = [size_classes] Class labels associated with each class. """ rng = check_random_state(random_state) X = rng.uniform(low=0, high=1, size=(n_samples, 2)) y = np.logical_xor(X[:, 0] > 0.5, X[:, 1] > 0.5) return X, y
Example #21
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(algorithm, weights): # Test radius neighbors in multi-output regression (uniform weight) rng = check_random_state(0) n_features = 5 n_samples = 40 n_output = 4 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_output) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) rnn = neighbors. RadiusNeighborsRegressor(weights=weights, algorithm=algorithm) rnn.fit(X_train, y_train) neigh_idx = rnn.radius_neighbors(X_test, return_distance=False) y_pred_idx = np.array([np.mean(y_train[idx], axis=0) for idx in neigh_idx]) y_pred_idx = np.array(y_pred_idx) y_pred = rnn.predict(X_test) assert_equal(y_pred_idx.shape, y_test.shape) assert_equal(y_pred.shape, y_test.shape) assert_array_almost_equal(y_pred, y_pred_idx)
Example #22
Source File: test_ranking.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_auc_score_non_binary_class(): # Test that roc_auc_score function returns an error when trying # to compute AUC for non-binary class values. rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred) clean_warning_registry() with warnings.catch_warnings(record=True): rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred)
Example #23
Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_regression_problem(n_instances=20, n_columns=1, n_timepoints=20, random_state=None): rng = check_random_state(random_state) y = pd.Series(rng.normal(size=n_instances)) X = _make_series_as_features_X(y, n_columns, n_timepoints, random_state=random_state) return X, y
Example #24
Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_classification_problem(n_instances=20, n_columns=1, n_timepoints=20, n_classes=2, random_state=None): rng = check_random_state(random_state) y = pd.Series(np.hstack([np.arange(n_classes), rng.randint(0, n_classes, size=n_instances - n_classes)])) X = _make_series_as_features_X(y, n_columns, n_timepoints, random_state=random_state) return X, y
Example #25
Source File: series_as_features.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _make_series_as_features_X(y, n_columns, n_timepoints, random_state=None): n_instances = len(y) rng = check_random_state(random_state) columns = [] for i in range(n_columns): rows = [] for j in range(n_instances): # we use the y value for the mean of the generated time series row = pd.Series(rng.normal(loc=y.iloc[j] * 20, scale=0.5, size=n_timepoints)) rows.append(row) column = pd.Series(rows) columns.append(column) return pd.DataFrame(columns).T
Example #26
Source File: forecasting.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_forecasting_problem(n_timepoints=50, random_state=None): rng = check_random_state(random_state) return pd.Series(rng.random(size=n_timepoints), index=pd.Int64Index(np.arange(n_timepoints)))
Example #27
Source File: randomadder.py From scikit-lego with MIT License | 5 votes |
def transform_train(self, X): rs = check_random_state(self.random_state) check_is_fitted(self, ["dim_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return X + rs.normal(0, self.noise, size=X.shape)
Example #28
Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_indices(): """Check that indices are stable when generated on the fly.""" params = {'function_set': [add2, sub2, mul2, div2], 'arities': {2: [add2, sub2, mul2, div2]}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) assert_raises(ValueError, gp.get_all_indices) assert_raises(ValueError, gp._indices) def get_indices_property(): return gp.indices_ assert_raises(ValueError, get_indices_property) indices, _ = gp.get_all_indices(10, 7, random_state) assert_array_equal(indices, gp.get_all_indices()[0]) assert_array_equal(indices, gp._indices()) assert_array_equal(indices, gp.indices_)
Example #29
Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_transformer_iterable(): """Check that the transformer is iterable""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min'] est = SymbolicTransformer(population_size=500, generations=2, function_set=function_set, random_state=0) # Check unfitted unfitted_len = len(est) unfitted_iter = [gp.length_ for gp in est] expected_iter = [] assert(unfitted_len == 0) assert(unfitted_iter == expected_iter) # Check fitted est.fit(X, y) fitted_len = len(est) fitted_iter = [gp.length_ for gp in est] expected_iter = [8, 12, 2, 29, 9, 33, 9, 8, 4, 22] assert(fitted_len == 10) assert(fitted_iter == expected_iter) # Check IndexError assert_raises(IndexError, est.__getitem__, 10)
Example #30
Source File: test_genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_output_shape(): """Check output shape is as expected""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the transformer est = SymbolicTransformer(population_size=100, generations=2, n_components=5, random_state=0) est.fit(X, y) assert(est.transform(X).shape == (5, 5))