Python Examples of sklearn.utils.check_random

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

9 votes

def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR(gamma='scale')]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)

Source File: test_randomized_lasso.py From stability-selection with BSD 3-Clause "New" or "Revised" License

6 votes

def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245):
    rng = check_random_state(random_state)

    sigma = np.eye(p)
    sigma[0, 2] = rho
    sigma[2, 0] = rho
    sigma[1, 2] = rho
    sigma[2, 1] = rho

    X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,))
    beta = np.zeros(p)
    beta[:2] = 1.0
    epsilon = rng.normal(0.0, 0.25, size=(n,))

    y = np.matmul(X, beta) + epsilon

    return X, y

Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graphical_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
    finally:
        sys.stdout = orig_stdout

    # Smoke test with specified alphas
    GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)

Source File: slm.py From revrand with Apache License 2.0

6 votes

def __init__(self,
                 basis=LinearBasis(),
                 var=Parameter(gamma(1.), Positive()),
                 tol=1e-8,
                 maxiter=1000,
                 nstarts=100,
                 random_state=None
                 ):
        """See class docstring."""
        self.basis = basis
        self.var = var
        self.tol = tol
        self.maxiter = maxiter
        self.nstarts = nstarts
        self.random_state = random_state
        self.random_ = check_random_state(random_state)

Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_iforest_warm_start():
    """Test iterative addition of iTrees to an iForest """

    rng = check_random_state(0)
    X = rng.randn(20, 2)

    # fit first 10 trees
    clf = IsolationForest(n_estimators=10, max_samples=20,
                          random_state=rng, warm_start=True)
    clf.fit(X)
    # remember the 1st tree
    tree_1 = clf.estimators_[0]
    # fit another 10 trees
    clf.set_params(n_estimators=20)
    clf.fit(X)
    # expecting 20 fitted trees and no overwritten trees
    assert len(clf.estimators_) == 20
    assert clf.estimators_[0] is tree_1

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_bootstrap_samples():
    # Test that bootstrapping samples generate non-perfect base estimators.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)

    # without bootstrap, all trees are perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=False,
                                random_state=rng).fit(X_train, y_train)

    assert_equal(base_estimator.score(X_train, y_train),
                 ensemble.score(X_train, y_train))

    # with bootstrap, trees are no longer perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=True,
                                random_state=rng).fit(X_train, y_train)

    assert_greater(base_estimator.score(X_train, y_train),
                   ensemble.score(X_train, y_train))

    # check that each sampling correspond to a complete bootstrap resample.
    # the size of each bootstrap should be the same as the input data but
    # the data should be different (checked using the hash of the data).
    ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(),
                                bootstrap=True).fit(X_train, y_train)
    training_hash = []
    for estimator in ensemble.estimators_:
        assert estimator.training_size_ == X_train.shape[0]
        training_hash.append(estimator.training_hash_)
    assert len(set(training_hash)) == len(training_hash)

Source File: test_truncated_svd.py From mars with Apache License 2.0

6 votes

def setUp(self):
        # Make an X that looks somewhat like a small tf-idf matrix.
        # XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this.
        shape = 60, 55
        n_samples, n_features = shape
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)
        X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64)
        X.data[:] = 1 + np.log(X.data)
        self.X = X
        self.Xdense = X.A
        self.n_samples = n_samples
        self.n_features = n_features

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_bootstrap_features():
    # Test that bootstrapping features may generate duplicate features.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=False,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_equal(boston.data.shape[1], np.unique(features).shape[0])

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=True,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_greater(boston.data.shape[1], np.unique(features).shape[0])

Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = - clf.decision_function(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98)

Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0

6 votes

def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified")

Source File: plot_randomized_lasso_path.py From stability-selection with BSD 3-Clause "New" or "Revised" License

6 votes

def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245):
    rng = check_random_state(random_state)

    sigma = np.eye(p)
    sigma[0, 2] = rho
    sigma[2, 0] = rho
    sigma[1, 2] = rho
    sigma[2, 1] = rho

    X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,))
    beta = np.zeros(p)
    beta[:2] = 1.0
    epsilon = rng.normal(0.0, 0.25, size=(n,))

    y = np.matmul(X, beta) + epsilon

    return X, y

Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0

6 votes

def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified")

Source File: test_graph_lasso.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_graph_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
    finally:
        sys.stdout = orig_stdout

    # Smoke test with specified alphas
    GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)

Source File: diagnostics.py From yatsm with MIT License

6 votes

def __init__(self, roi, n_folds=3, mask_values=[0], shuffle=False,
                 random_state=None):
        self.roi = roi
        self.n_folds = n_folds
        if isinstance(mask_values, (float, int)):
            self.mask_values = np.array([mask_values])
        elif isinstance(mask_values, (list, tuple)):
            self.mask_values = np.array(mask_values)
        elif isinstance(mask_values, np.ndarray):
            self.mask_values = mask_values
        else:
            raise TypeError('mask_values must be float, int, list, tuple,'
                            ' or np.ndarray')
        if shuffle:
            self.shuffle = True
            self.rng = check_random_state(random_state)

        self._label_roi()

Source File: test_pls.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_pls_scaling():
    # sanity check for scale=True
    n_samples = 1000
    n_targets = 5
    n_features = 10

    rng = check_random_state(0)

    Q = rng.randn(n_targets, n_features)
    Y = rng.randn(n_samples, n_targets)
    X = np.dot(Y, Q) + 2 * rng.randn(n_samples, n_features) + 1
    X *= 1000
    X_scaled = StandardScaler().fit_transform(X)

    pls = pls_.PLSRegression(n_components=5, scale=True)

    pls.fit(X, Y)
    score = pls.score(X, Y)

    pls.fit(X_scaled, Y)
    score_scaled = pls.score(X_scaled, Y)

    assert_approx_equal(score, score_scaled)

Source File: basis_functions.py From revrand with Apache License 2.0

6 votes

def __init__(self,
                 nbases,
                 Xdim,
                 mean=Parameter(norm_dist(), Bound()),
                 lenscale=Parameter(gamma(1.), Positive()),
                 regularizer=None,
                 random_state=None
                 ):
        """See this class's docstring."""
        self.random_state = random_state  # for repr
        self._random = check_random_state(random_state)
        self._init_dims(nbases, Xdim)
        self._params = [self._init_param(mean),
                        self._init_param(lenscale)]
        self._init_matrices()
        super(_LengthScaleBasis, self).__init__(regularizer)

Source File: lmdd.py From pyod with BSD 2-Clause "Simplified" License

6 votes

def _check_params(n_iter, dis_measure, random_state):
    """Internal function to check for and validate class parameters.
    Also, to return random state instance and the appropriate dissimilarity
    measure if valid.
    """
    if isinstance(n_iter, int):
        check_parameter(n_iter, low=1, param_name='n_iter')
    else:
        raise TypeError("n_iter should be int, got %s" % n_iter)

    if isinstance(dis_measure, str):
        if dis_measure not in ('aad', 'var', 'iqr'):
            raise ValueError("Unknown dissimilarity measure type, "
                             "dis_measure should be in "
                             "(\'aad\', \'var\', \'iqr\'), "
                             "got %s" % dis_measure)
        # TO-DO: 'mad': Median Absolute Deviation to be added
        # once Scipy stats version 1.3.0 is released
    else:
        raise TypeError("dis_measure should be str, got %s" % dis_measure)

    return check_random_state(random_state), _aad if dis_measure == 'aad' \
        else (np.var if dis_measure == 'var'
              else (stats.iqr if dis_measure == 'iqr' else None))

Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

6 votes

def sample_blobs(n, ratio, rows=5, cols=5, sep=10, rs=None):
    rs = check_random_state(rs)
    # ratio is eigenvalue ratio
    correlation = (ratio - 1) / (ratio + 1)

    # generate within-blob variation
    mu = np.zeros(2)
    sigma = np.eye(2)
    X = rs.multivariate_normal(mu, sigma, size=n)

    corr_sigma = np.array([[1, correlation], [correlation, 1]])
    Y = rs.multivariate_normal(mu, corr_sigma, size=n)

    # assign to blobs
    X[:, 0] += rs.randint(rows, size=n) * sep
    X[:, 1] += rs.randint(cols, size=n) * sep
    Y[:, 0] += rs.randint(rows, size=n) * sep
    Y[:, 1] += rs.randint(cols, size=n) * sep

    return X, Y


################################################################################
### Sample images from GANs

Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_iforest_parallel_regression():
    """Check parallel regression."""
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = IsolationForest(n_jobs=3,
                               random_state=0).fit(X_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = IsolationForest(n_jobs=1,
                               random_state=0).fit(X_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)

Source File: glm.py From revrand with Apache License 2.0

6 votes

def __init__(self,
                 likelihood=Gaussian(),
                 basis=LinearBasis(),
                 K=10,
                 maxiter=3000,
                 batch_size=10,
                 updater=None,
                 nsamples=50,
                 nstarts=500,
                 random_state=None
                 ):
        """See class docstring."""
        self.likelihood = likelihood
        self.basis = basis
        self.K = K
        self.maxiter = maxiter
        self.batch_size = batch_size
        self.updater = updater
        self.nsamples = nsamples
        self.nstarts = nstarts
        self.random_state = random_state  # For clone compatibility
        self.random_ = check_random_state(self.random_state)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_single_estimator():
    # Check singleton ensembles.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(),
                            n_estimators=1,
                            bootstrap=False,
                            bootstrap_features=False,
                            random_state=rng).fit(X_train, y_train)

    clf2 = KNeighborsRegressor().fit(X_train, y_train)

    assert_array_almost_equal(clf1.predict(X_test), clf2.predict(X_test))

Source File: generators.py From sklearn-theano with BSD 3-Clause "New" or "Revised" License

5 votes

def fetch_cifar_fully_connected_generated(n_samples=1000, random_state=None):
    rng = check_random_state(random_state)
    generator_func = _get_cifar_fully_connected_fprop()
    X = rng.randn(n_samples, 100).astype('float32')
    X_tf = generator_func(X)[0]
    X_tf = _normalize(X_tf)
    X_tf = X_tf.reshape(len(X_tf), 3, 32, 32).transpose(0, 2, 3, 1)
    return (255 * X_tf).astype('uint8')

Source File: garules.py From fylearn with MIT License

5 votes

def __init__(self, n_iterations=10, n_models=3, random_state=None, sample_size=10, n_iterations_weights=10):
        self.n_iterations = n_iterations
        self.n_models = n_models
        self.random_state = check_random_state(random_state)
        self.sample_size = sample_size
        self.n_iterations_weights = n_iterations_weights

Source File: ga.py From fylearn with MIT License

5 votes

def __call__(self, A, B, random_state):
        random_state = check_random_state(random_state)
        A, B = np.array(A, copy=False), np.array(B, copy=False)
        is_1d = len(A.shape) == 1
        A, B = np.atleast_2d(A), np.atleast_2d(B)
        C = np.zeros(A.shape)

        def pick(a, b, i):
            r = a if i[2] == 0 else b
            return r[i[0]:i[1]].tolist()

        start = [0]
        end = [A.shape[1]]

        for idx, a in enumerate(A):
            b = B[idx]
            selected = np.sort(random_state.choice(self.crossover_locations, self.n_crossovers))
            # use python to merge
            selected = start + selected.tolist() + end
            index = zip(selected, selected[1:], [0, 1] * len(selected))
            merged = np.array([ item for i in index for item in pick(a, b, i) ])
            # add merged child
            C[idx, :] = merged

        if is_1d:
            return C.ravel()
        else:
            return C

Source File: local_search.py From fylearn with MIT License

5 votes

def __init__(self, f, lower_bound, upper_bound, lower_init=None, upper_init=None,
                 random_state=None, max_evaluations=100):

        self.f = f

        self.lower_bound = np.array(lower_bound)
        self.upper_bound = np.array(upper_bound)
        self.lower_init = np.array(lower_bound) if lower_init is None else np.array(lower_init)
        self.upper_init = np.array(upper_bound) if upper_init is None else np.array(upper_init)

        self.random_state = check_random_state(random_state)

        self.max_evaluations = int(max_evaluations)

        self.optimize_function_args = {}

Source File: garules.py From fylearn with MIT License

5 votes

def __init__(self, n_iterations=10, df=stoean_f, random_state=None):
        self.n_iterations = n_iterations
        self.df = df
        self.random_state = check_random_state(random_state)

Source File: tlbo.py From fylearn with MIT License

5 votes

def __init__(self, f, lower_bound, upper_bound,
                 n_population=50, random_state=None):
        """
        Constructor

        Parameters:
        -----------

        f : function to minimize.

        lower_bound : Vector with lower bound of the search space.

        upper_bound : Vector with upper bound of the search space.

        n_population : Number of individuals in the population [Default: 50].

        random_state : Specific random state to use [Default: None]
        """
        self.f = f
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.pidx = list(range(n_population))
        self.m = lower_bound.shape[0]  # columns
        self.random_state = check_random_state(random_state)

        # init population and fitness
        self.population_ = self.random_state.rand(n_population, self.m) * (upper_bound - lower_bound) + lower_bound
        self.fitness_ = np.apply_along_axis(self.f, 1, self.population_)
        # init bestidx
        self.bestidx_ = np.argmin(self.fitness_)
        self.bestcosts_ = [ self.fitness_[self.bestidx_] ]

Source File: test_utility.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def setUp(self):
        random_state = check_random_state(42)
        self.X_train = random_state.rand(500, 5)
        self.X_test = random_state.rand(100, 5)
        self.X_test_diff = random_state.rand(100, 10)
        self.scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1]
        self.scores2 = np.array([0.1, 0.3, 0.5, 0.7, 0.2, 0.1])

Python sklearn.utils.check_random_state() Examples