Python sklearn.utils.check_random_state() Examples
The following are 30
code examples of sklearn.utils.check_random_state().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 9 votes |
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR(gamma='scale')]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #2
Source File: test_randomized_lasso.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 6 votes |
def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245): rng = check_random_state(random_state) sigma = np.eye(p) sigma[0, 2] = rho sigma[2, 0] = rho sigma[1, 2] = rho sigma[2, 1] = rho X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,)) beta = np.zeros(p) beta[:2] = 1.0 epsilon = rng.normal(0.0, 0.25, size=(n,)) y = np.matmul(X, beta) + epsilon return X, y
Example #3
Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_graphical_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
Example #4
Source File: slm.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, basis=LinearBasis(), var=Parameter(gamma(1.), Positive()), tol=1e-8, maxiter=1000, nstarts=100, random_state=None ): """See class docstring.""" self.basis = basis self.var = var self.tol = tol self.maxiter = maxiter self.nstarts = nstarts self.random_state = random_state self.random_ = check_random_state(random_state)
Example #5
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_warm_start(): """Test iterative addition of iTrees to an iForest """ rng = check_random_state(0) X = rng.randn(20, 2) # fit first 10 trees clf = IsolationForest(n_estimators=10, max_samples=20, random_state=rng, warm_start=True) clf.fit(X) # remember the 1st tree tree_1 = clf.estimators_[0] # fit another 10 trees clf.set_params(n_estimators=20) clf.fit(X) # expecting 20 fitted trees and no overwritten trees assert len(clf.estimators_) == 20 assert clf.estimators_[0] is tree_1
Example #6
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #7
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert_equal(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert_greater(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(), bootstrap=True).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: assert estimator.training_size_ == X_train.shape[0] training_hash.append(estimator.training_hash_) assert len(set(training_hash)) == len(training_hash)
Example #8
Source File: test_truncated_svd.py From mars with Apache License 2.0 | 6 votes |
def setUp(self): # Make an X that looks somewhat like a small tf-idf matrix. # XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this. shape = 60, 55 n_samples, n_features = shape rng = check_random_state(42) X = rng.randint(-100, 20, np.product(shape)).reshape(shape) X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64) X.data[:] = 1 + np.log(X.data) self.X = X self.Xdense = X.A self.n_samples = n_samples self.n_features = n_features self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
Example #9
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_equal(boston.data.shape[1], np.unique(features).shape[0]) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_greater(boston.data.shape[1], np.unique(features).shape[0])
Example #10
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_performance(): """Test Isolation Forest performs well""" # Generate train/test data rng = check_random_state(2) X = 0.3 * rng.randn(120, 2) X_train = np.r_[X + 2, X - 2] X_train = X[:100] # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) X_test = np.r_[X[100:], X_outliers] y_test = np.array([0] * 20 + [1] * 20) # fit the model clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train) # predict scores (the lower, the more normal) y_pred = - clf.decision_function(X_test) # check that there is at most 6 errors (false positive or false negative) assert_greater(roc_auc_score(y_test, y_pred), 0.98)
Example #11
Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0 | 6 votes |
def __init__(self, test_model=False, verify_model=True): model = Word2Vec.load(modelfile) if(test_model): acc = model.accuracy(questionfile) logger.info("Test model " + modelfile + " in " + questionfile) self.vector_size = model.vector_size self.vocab_size = len(model.wv.vocab) + 1 self.word2index = self.GetWord2Index(model) self.index2word = self.GetIndex2Word(model) self.wordvector = self.GetWordVector(model) if(verify_model): logger.info("Verifing imported word2vec model") random_state = check_random_state(12) check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000) for index in check_index: word_wv = model.wv.index2word[index] word_our = self.index2word[index+1] #print(index, word_wv, word_our) assert word_wv == word_our assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1 assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]]) logger.info("Imported word2vec model is verified")
Example #12
Source File: plot_randomized_lasso_path.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 6 votes |
def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245): rng = check_random_state(random_state) sigma = np.eye(p) sigma[0, 2] = rho sigma[2, 0] = rho sigma[1, 2] = rho sigma[2, 1] = rho X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,)) beta = np.zeros(p) beta[:2] = 1.0 epsilon = rng.normal(0.0, 0.25, size=(n,)) y = np.matmul(X, beta) + epsilon return X, y
Example #13
Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0 | 6 votes |
def __init__(self, test_model=False, verify_model=True): model = Word2Vec.load(modelfile) if(test_model): acc = model.accuracy(questionfile) logger.info("Test model " + modelfile + " in " + questionfile) self.vector_size = model.vector_size self.vocab_size = len(model.wv.vocab) + 1 self.word2index = self.GetWord2Index(model) self.index2word = self.GetIndex2Word(model) self.wordvector = self.GetWordVector(model) if(verify_model): logger.info("Verifing imported word2vec model") random_state = check_random_state(12) check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000) for index in check_index: word_wv = model.wv.index2word[index] word_our = self.index2word[index+1] #print(index, word_wv, word_our) assert word_wv == word_our assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1 assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]]) logger.info("Imported word2vec model is verified")
Example #14
Source File: test_graph_lasso.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
Example #15
Source File: diagnostics.py From yatsm with MIT License | 6 votes |
def __init__(self, roi, n_folds=3, mask_values=[0], shuffle=False, random_state=None): self.roi = roi self.n_folds = n_folds if isinstance(mask_values, (float, int)): self.mask_values = np.array([mask_values]) elif isinstance(mask_values, (list, tuple)): self.mask_values = np.array(mask_values) elif isinstance(mask_values, np.ndarray): self.mask_values = mask_values else: raise TypeError('mask_values must be float, int, list, tuple,' ' or np.ndarray') if shuffle: self.shuffle = True self.rng = check_random_state(random_state) self._label_roi()
Example #16
Source File: test_pls.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_pls_scaling(): # sanity check for scale=True n_samples = 1000 n_targets = 5 n_features = 10 rng = check_random_state(0) Q = rng.randn(n_targets, n_features) Y = rng.randn(n_samples, n_targets) X = np.dot(Y, Q) + 2 * rng.randn(n_samples, n_features) + 1 X *= 1000 X_scaled = StandardScaler().fit_transform(X) pls = pls_.PLSRegression(n_components=5, scale=True) pls.fit(X, Y) score = pls.score(X, Y) pls.fit(X_scaled, Y) score_scaled = pls.score(X_scaled, Y) assert_approx_equal(score, score_scaled)
Example #17
Source File: basis_functions.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, nbases, Xdim, mean=Parameter(norm_dist(), Bound()), lenscale=Parameter(gamma(1.), Positive()), regularizer=None, random_state=None ): """See this class's docstring.""" self.random_state = random_state # for repr self._random = check_random_state(random_state) self._init_dims(nbases, Xdim) self._params = [self._init_param(mean), self._init_param(lenscale)] self._init_matrices() super(_LengthScaleBasis, self).__init__(regularizer)
Example #18
Source File: lmdd.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def _check_params(n_iter, dis_measure, random_state): """Internal function to check for and validate class parameters. Also, to return random state instance and the appropriate dissimilarity measure if valid. """ if isinstance(n_iter, int): check_parameter(n_iter, low=1, param_name='n_iter') else: raise TypeError("n_iter should be int, got %s" % n_iter) if isinstance(dis_measure, str): if dis_measure not in ('aad', 'var', 'iqr'): raise ValueError("Unknown dissimilarity measure type, " "dis_measure should be in " "(\'aad\', \'var\', \'iqr\'), " "got %s" % dis_measure) # TO-DO: 'mad': Median Absolute Deviation to be added # once Scipy stats version 1.3.0 is released else: raise TypeError("dis_measure should be str, got %s" % dis_measure) return check_random_state(random_state), _aad if dis_measure == 'aad' \ else (np.var if dis_measure == 'var' else (stats.iqr if dis_measure == 'iqr' else None))
Example #19
Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def sample_blobs(n, ratio, rows=5, cols=5, sep=10, rs=None): rs = check_random_state(rs) # ratio is eigenvalue ratio correlation = (ratio - 1) / (ratio + 1) # generate within-blob variation mu = np.zeros(2) sigma = np.eye(2) X = rs.multivariate_normal(mu, sigma, size=n) corr_sigma = np.array([[1, correlation], [correlation, 1]]) Y = rs.multivariate_normal(mu, corr_sigma, size=n) # assign to blobs X[:, 0] += rs.randint(rows, size=n) * sep X[:, 1] += rs.randint(cols, size=n) * sep Y[:, 0] += rs.randint(rows, size=n) * sep Y[:, 1] += rs.randint(cols, size=n) * sep return X, Y ################################################################################ ### Sample images from GANs
Example #20
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_parallel_regression(): """Check parallel regression.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = IsolationForest(n_jobs=3, random_state=0).fit(X_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = IsolationForest(n_jobs=1, random_state=0).fit(X_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #21
Source File: glm.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, likelihood=Gaussian(), basis=LinearBasis(), K=10, maxiter=3000, batch_size=10, updater=None, nsamples=50, nstarts=500, random_state=None ): """See class docstring.""" self.likelihood = likelihood self.basis = basis self.K = K self.maxiter = maxiter self.batch_size = batch_size self.updater = updater self.nsamples = nsamples self.nstarts = nstarts self.random_state = random_state # For clone compatibility self.random_ = check_random_state(self.random_state)
Example #22
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #23
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_single_estimator(): # Check singleton ensembles. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(), n_estimators=1, bootstrap=False, bootstrap_features=False, random_state=rng).fit(X_train, y_train) clf2 = KNeighborsRegressor().fit(X_train, y_train) assert_array_almost_equal(clf1.predict(X_test), clf2.predict(X_test))
Example #24
Source File: generators.py From sklearn-theano with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fetch_cifar_fully_connected_generated(n_samples=1000, random_state=None): rng = check_random_state(random_state) generator_func = _get_cifar_fully_connected_fprop() X = rng.randn(n_samples, 100).astype('float32') X_tf = generator_func(X)[0] X_tf = _normalize(X_tf) X_tf = X_tf.reshape(len(X_tf), 3, 32, 32).transpose(0, 2, 3, 1) return (255 * X_tf).astype('uint8')
Example #25
Source File: garules.py From fylearn with MIT License | 5 votes |
def __init__(self, n_iterations=10, n_models=3, random_state=None, sample_size=10, n_iterations_weights=10): self.n_iterations = n_iterations self.n_models = n_models self.random_state = check_random_state(random_state) self.sample_size = sample_size self.n_iterations_weights = n_iterations_weights
Example #26
Source File: ga.py From fylearn with MIT License | 5 votes |
def __call__(self, A, B, random_state): random_state = check_random_state(random_state) A, B = np.array(A, copy=False), np.array(B, copy=False) is_1d = len(A.shape) == 1 A, B = np.atleast_2d(A), np.atleast_2d(B) C = np.zeros(A.shape) def pick(a, b, i): r = a if i[2] == 0 else b return r[i[0]:i[1]].tolist() start = [0] end = [A.shape[1]] for idx, a in enumerate(A): b = B[idx] selected = np.sort(random_state.choice(self.crossover_locations, self.n_crossovers)) # use python to merge selected = start + selected.tolist() + end index = zip(selected, selected[1:], [0, 1] * len(selected)) merged = np.array([ item for i in index for item in pick(a, b, i) ]) # add merged child C[idx, :] = merged if is_1d: return C.ravel() else: return C
Example #27
Source File: local_search.py From fylearn with MIT License | 5 votes |
def __init__(self, f, lower_bound, upper_bound, lower_init=None, upper_init=None, random_state=None, max_evaluations=100): self.f = f self.lower_bound = np.array(lower_bound) self.upper_bound = np.array(upper_bound) self.lower_init = np.array(lower_bound) if lower_init is None else np.array(lower_init) self.upper_init = np.array(upper_bound) if upper_init is None else np.array(upper_init) self.random_state = check_random_state(random_state) self.max_evaluations = int(max_evaluations) self.optimize_function_args = {}
Example #28
Source File: garules.py From fylearn with MIT License | 5 votes |
def __init__(self, n_iterations=10, df=stoean_f, random_state=None): self.n_iterations = n_iterations self.df = df self.random_state = check_random_state(random_state)
Example #29
Source File: tlbo.py From fylearn with MIT License | 5 votes |
def __init__(self, f, lower_bound, upper_bound, n_population=50, random_state=None): """ Constructor Parameters: ----------- f : function to minimize. lower_bound : Vector with lower bound of the search space. upper_bound : Vector with upper bound of the search space. n_population : Number of individuals in the population [Default: 50]. random_state : Specific random state to use [Default: None] """ self.f = f self.lower_bound = lower_bound self.upper_bound = upper_bound self.pidx = list(range(n_population)) self.m = lower_bound.shape[0] # columns self.random_state = check_random_state(random_state) # init population and fitness self.population_ = self.random_state.rand(n_population, self.m) * (upper_bound - lower_bound) + lower_bound self.fitness_ = np.apply_along_axis(self.f, 1, self.population_) # init bestidx self.bestidx_ = np.argmin(self.fitness_) self.bestcosts_ = [ self.fitness_[self.bestidx_] ]
Example #30
Source File: test_utility.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def setUp(self): random_state = check_random_state(42) self.X_train = random_state.rand(500, 5) self.X_test = random_state.rand(100, 5) self.X_test_diff = random_state.rand(100, 10) self.scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1] self.scores2 = np.array([0.1, 0.3, 0.5, 0.7, 0.2, 0.1])