Python Examples of numpy.RandomState

Source File: shared_randomstreams.py From D-VAE with MIT License

6 votes

def randomstate_constructor(value, name=None, strict=False,
                            allow_downcast=None, borrow=False):
    """
    SharedVariable Constructor for RandomState.

    """
    if not isinstance(value, numpy.random.RandomState):
        raise TypeError
    if not borrow:
        value = copy.deepcopy(value)
    return RandomStateSharedVariable(
        type=raw_random.random_state_type,
        value=value,
        name=name,
        strict=strict,
        allow_downcast=allow_downcast)

Source File: shared_randomstreams.py From D-VAE with MIT License

6 votes

def seed(self, seed=None):
        """
        Re-initialize each random stream.

        Parameters
        ----------
        seed : None or integer in range 0 to 2**30
            Each random stream will be assigned a unique state that depends
            deterministically on this value.

        Returns
        -------
        None

        """
        if seed is None:
            seed = self.default_instance_seed

        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.state_updates:
            old_r_seed = seedgen.randint(2 ** 30)
            old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
                            borrow=True)

Source File: shared_randomstreams.py From D-VAE with MIT License

6 votes

def __getitem__(self, item):
        """
        Retrieve the numpy RandomState instance associated with a particular
        stream.

        Parameters
        ----------
        item
            A variable of type RandomStateType, associated
            with this RandomStream.

        Returns
        -------
        numpy RandomState (or None, before initialize)

        Notes
        -----
        This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
        The simpler syntax ``item.rng.get_value()`` is also valid.

        """
        return item.get_value(borrow=True)

Source File: shared_randomstreams.py From D-VAE with MIT License

6 votes

def __setitem__(self, item, val):
        """
        Set the numpy RandomState instance associated with a particular stream.

        Parameters
        ----------
        item
            A variable of type RandomStateType, associated with this
            RandomStream.

        val : numpy RandomState
            The new value.

        Returns
        -------
        None

        Notes
        -----
        This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
        The simpler syntax ``item.rng.set_value(val)`` is also valid.

        """
        item.set_value(val, borrow=True)

Source File: shared_randomstreams.py From attention-lvcsr with MIT License

6 votes

def randomstate_constructor(value, name=None, strict=False,
                            allow_downcast=None, borrow=False):
    """
    SharedVariable Constructor for RandomState.

    """
    if not isinstance(value, numpy.random.RandomState):
        raise TypeError
    if not borrow:
        value = copy.deepcopy(value)
    return RandomStateSharedVariable(
        type=raw_random.random_state_type,
        value=value,
        name=name,
        strict=strict,
        allow_downcast=allow_downcast)

Source File: shared_randomstreams.py From attention-lvcsr with MIT License

6 votes

def seed(self, seed=None):
        """
        Re-initialize each random stream.

        Parameters
        ----------
        seed : None or integer in range 0 to 2**30
            Each random stream will be assigned a unique state that depends
            deterministically on this value.

        Returns
        -------
        None

        """
        if seed is None:
            seed = self.default_instance_seed

        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.state_updates:
            old_r_seed = seedgen.randint(2 ** 30)
            old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
                            borrow=True)

Source File: shared_randomstreams.py From attention-lvcsr with MIT License

6 votes

def __getitem__(self, item):
        """
        Retrieve the numpy RandomState instance associated with a particular
        stream.

        Parameters
        ----------
        item
            A variable of type RandomStateType, associated
            with this RandomStream.

        Returns
        -------
        numpy RandomState (or None, before initialize)

        Notes
        -----
        This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
        The simpler syntax ``item.rng.get_value()`` is also valid.

        """
        return item.get_value(borrow=True)

Source File: shared_randomstreams.py From attention-lvcsr with MIT License

6 votes

def __setitem__(self, item, val):
        """
        Set the numpy RandomState instance associated with a particular stream.

        Parameters
        ----------
        item
            A variable of type RandomStateType, associated with this
            RandomStream.

        val : numpy RandomState
            The new value.

        Returns
        -------
        None

        Notes
        -----
        This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
        The simpler syntax ``item.rng.set_value(val)`` is also valid.

        """
        item.set_value(val, borrow=True)

Source File: utils.py From pymer4 with MIT License

6 votes

def _check_random_state(seed):
    """Turn seed into a np.random.RandomState instance. Note: credit for this code goes entirely to sklearn.utils.check_random_state. Using the source here simply avoids an unecessary dependency.

    Args:
        seed (None, int, np.RandomState): iff seed is None, return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. Otherwise raise ValueError.
    """

    import numbers

    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    if isinstance(seed, (numbers.Integral, np.integer)):
        return np.random.RandomState(seed)
    if isinstance(seed, np.random.RandomState):
        return seed
    raise ValueError(
        "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed
    )

Source File: feature_utilities.py From vessel-classification with Apache License 2.0

6 votes

def np_zero_pad_slice(slice, window_size, random_state):
    """ Pads slice to the specified window size.

  Series that are shorter than window_size are repeated into unfilled space.

  Args:
    slice: np.array.
    window_size: int
        Size the array must be padded to.
    random_state: np.RandomState

  Returns:
    a numpy array of length window_size in the first dimension.

    TODO: this function has an inaccurate name, really this is doing
    pad_repeat_slice with a random offset for data augmentation. 
    Rename or remove at next cleanup since it doesn't appear to be
    used..
  """

    slice_length = len(slice)
    delta = window_size - slice_length
    assert delta >= 0
    offset = random_state.randint(0, delta + 1)
    return np.concatenate([slice] * reps, axis=0)[offset:offset+window_size]

Source File: shared_randomstreams.py From D-VAE with MIT License

5 votes

def __init__(self, seed=None):
        super(RandomStreams, self).__init__()
        # A list of pairs of the form (input_r, output_r).  This will be
        # over-ridden by the module instance to contain stream generators.
        self.state_updates = []
        # Instance variable should take None or integer value. Used to seed the
        # random number generator that provides seeds for member streams.
        self.default_instance_seed = seed
        # numpy.RandomState instance that gen() uses to seed new streams.
        self.gen_seedgen = numpy.random.RandomState(seed)

Source File: utils.py From pymer4 with MIT License

5 votes

def _permute_sign(data, seed, return_stat="mean"):
    """Given a list/array of data, randomly sign flip the values and compute a new mean. For use in one-sample permutation test. Returns a 'mean' or 't-stat'."""

    random_state = np.random.RandomState(seed)
    new_dat = data * random_state.choice([1, -1], len(data))
    if return_stat == "ceof":
        return np.mean(new_dat)
    elif return_stat == "t-stat":
        return np.mean(new_dat) / (np.std(new_dat, ddof=1) / np.sqrt(len(new_dat)))

Source File: feature_utilities.py From vessel-classification with Apache License 2.0

5 votes

def np_pad_repeat_slice_2(slice, window_size, random_state):
    """ Pads slice to the specified window size then rolls them.

  Series that are shorter than window_size are repeated into unfilled space,
  then the series is randomly rolled along the time axis to generate more 
  training diversity. This has the side effect of adding a non-physical 
  seam in the data, but in practice seems to work better than not rolling.

  Similar to `np_pad_repeat_slice` except for rolling the sequence along the
  time axis.

  Args:
    slice: a numpy array.
    window_size: the size the array must be padded to.
    random_state: a numpy RandomState object.

  Returns:
    a numpy array of length window_size in the first dimension.
  """

    slice_length = len(slice)
    delta = window_size - slice_length
    assert delta >= 0
    slice = slice.copy()
    GAP_LOGDT = 100
    slice[0, 1] = GAP_LOGDT
    reps = int(np.ceil(window_size / float(slice_length)))
    repeated = np.concatenate([slice] * reps, axis=0)
    offset = random_state.randint(0, window_size)
    return np.roll(repeated, offset, axis=0)[:window_size]

Source File: util.py From BayesianOptimization with MIT License

5 votes

def ensure_rng(random_state=None):
    """
    Creates a random number generator based on an optional seed.  This can be
    an integer or another random state for a seeded rng, or None for an
    unseeded rng.
    """
    if random_state is None:
        random_state = np.random.RandomState()
    elif isinstance(random_state, int):
        random_state = np.random.RandomState(random_state)
    else:
        assert isinstance(random_state, np.random.RandomState)
    return random_state

Source File: metrics.py From tslearn with BSD 2-Clause "Simplified" License

5 votes

def gamma_soft_dtw(dataset, n_samples=100, random_state=None):
    r"""Compute gamma value to be used for GAK/Soft-DTW.

    This method was originally presented in [1]_.

    Parameters
    ----------
    dataset
        A dataset of time series
    n_samples : int (default: 100)
        Number of samples on which median distance should be estimated
    random_state : integer or numpy.RandomState or None (default: None)
        The generator used to draw the samples. If an integer is given, it
        fixes the seed. Defaults to the global numpy random number generator.

    Returns
    -------
    float
        Suggested :math:`\gamma` parameter for the Soft-DTW

    Examples
    --------
    >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]]
    >>> gamma_soft_dtw(dataset=dataset,
    ...                n_samples=200,
    ...                random_state=0)  # doctest: +ELLIPSIS
    8.0...

    See Also
    --------
    sigma_gak : Compute sigma parameter for Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """
    return 2. * sigma_gak(dataset=dataset,
                          n_samples=n_samples,
                          random_state=random_state) ** 2

Source File: shared_randomstreams.py From attention-lvcsr with MIT License

5 votes

def __init__(self, seed=None):
        super(RandomStreams, self).__init__()
        # A list of pairs of the form (input_r, output_r).  This will be
        # over-ridden by the module instance to contain stream generators.
        self.state_updates = []
        # Instance variable should take None or integer value. Used to seed the
        # random number generator that provides seeds for member streams.
        self.default_instance_seed = seed
        # numpy.RandomState instance that gen() uses to seed new streams.
        self.gen_seedgen = numpy.random.RandomState(seed)

Source File: generators.py From tslearn with BSD 2-Clause "Simplified" License

4 votes

def random_walk_blobs(n_ts_per_blob=100, sz=256, d=1, n_blobs=2,
                      noise_level=1., random_state=None):
    """Blob-based random walk time series generator.

    Generate n_ts_per_blobs * n_blobs time series of size sz and
    dimensionality d.
    Generated time series follow the model:

    .. math::

        ts[t] = ts[t - 1] + a

    where :math:`a` is drawn from a normal distribution of mean mu and
    standard deviation std.

    Each blob contains time series derived from a same seed time series with
    added white noise.

    Parameters
    ----------
    n_ts_per_blob : int (default: 100)
        Number of time series in each blob
    sz : int (default: 256)
        Length of time series (number of time instants)
    d : int (default: 1)
        Dimensionality of time series
    n_blobs : int (default: 2)
        Number of blobs
    noise_level : float (default: 1.)
        Standard deviation of white noise added to time series in each blob
    random_state : integer or numpy.RandomState or None (default: None)
        Generator used to draw the time series. If an integer is given, it
        fixes the seed. Defaults to the global
        numpy random number generator.

    Returns
    -------
    numpy.ndarray
        A dataset of random walk time series
    numpy.ndarray
        Labels associated to random walk time series (blob id)

    Examples
    --------
    >>> X, y = random_walk_blobs(n_ts_per_blob=100, sz=256, d=5, n_blobs=3)
    >>> X.shape
    (300, 256, 5)
    >>> y.shape
    (300,)
    """
    rs = check_random_state(random_state)
    base_ts = random_walks(n_ts=n_blobs, sz=sz, d=d, std=1.0, random_state=rs)
    rnd = rs.randn(n_ts_per_blob * n_blobs, sz, d) * noise_level
    ts = numpy.repeat(base_ts, repeats=n_ts_per_blob, axis=0)
    y = numpy.repeat(range(n_blobs), repeats=n_ts_per_blob)
    return ts + rnd, y

Source File: decomposition.py From tensorlib with BSD 3-Clause "New" or "Revised" License

4 votes

def cp(X, n_components=None, tol=1E-4, max_iter=500, init_type="hosvd",
       random_state=None):
    """
    CANDECOMP/PARAFAC decomposition using an alternating least squares
    algorithm.

    Parameters
    ----------
    X : ndarray
        Input data to decompose

    n_components : int
        The number of components in the decomposition. Note that unlike PCA or
        SVD, the decomposition of n_components + 1 DOES NOT contain
        the basis from the decomposition of n_components.

    tol : float, optional (default=1E-4)
        Stopping tolerance for reconstruction error.

    max_iter : int, optional (default=500)
        Maximum number of iterations to perform before exiting.

    init_type : string, optional (default="hosvd")
        How to initialize the decomposition. Choices are "random" or "hosvd",
        where "random" is initialized with uniform random values, and "hosvd" is
        initialized by the high order SVD of the dataset.

    random_state : int, None, or np.RandomState instance
       Random seed information to use when ``init_type`` == "random"


    Returns
    -------
    components : list, length = X.ndim
        Basis functions for X, each of shape [X.shape[idx], n_components] where
        idx is the index into ``components``.


    References
    ----------
    Kolda, T. G. & Bader, B. W.
        Tensor Decompositions and Applications. SIAM Rev. 51, 455-500 (2009).

    J.M. Landsberg, Tensors: Geometry and Applications. American Mathematical
        Society (2011).

    G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,
        Section 5.4.4, pp. 252-253.

    """
    if n_components is None:
        raise ValueError("n_components is a required argument!")

    check_tensor(X)
    return _cpN(X, n_components, tol=tol, max_iter=max_iter,
                init_type=init_type, random_state=random_state)

Source File: decomposition.py From tensorlib with BSD 3-Clause "New" or "Revised" License

4 votes

def tucker(X, n_components=None, tol=1E-6, max_iter=500, init_type="hosvd",
           random_state=None):
    """
    Tucker decomposition using an alternating least squares
    algorithm.

    Parameters
    ----------
    X : ndarray
        Input data to decompose

    n_components : int
        The number of components in the decomposition. Note that unlike PCA or
        SVD, the decomposition of n_components + 1 DOES NOT contain
        the basis from the decomposition of n_components.

    tol : float, optional (default=1E-4)
        Stopping tolerance for reconstruction error.

    max_iter : int, optional (default=500)
        Maximum number of iterations to perform before exiting.

    init_type : string, optional (default="hosvd")
        How to initialize the decomposition. Choices are "random" or "hosvd",
        where "random" is initialized with uniform random values, and "hosvd" is
        initialized by the high order SVD of the dataset.

    random_state : int, None, or np.RandomState instance
       Random seed information to use when ``init_type`` == "random"


    Returns
    -------
    components : list, length = X.ndim
        Basis functions for X, each of shape [X.shape[idx], n_components] where
        idx is the index into ``components``. First component is a multiplier G,
        followed by components for each mode.


    References
    ----------
    Kolda, T. G. & Bader, B. W.
        Tensor Decompositions and Applications. SIAM Rev. 51, 455-500 (2009).

    J.M. Landsberg, Tensors: Geometry and Applications. American Mathematical
        Society (2011).

    G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,
        Section 5.4.4, pp. 252-253.

    """
    if n_components is None:
        raise ValueError("n_components is a required argument!")

    check_tensor(X)
    return _tuckerN(X, n_components, tol=tol, max_iter=max_iter,
                    init_type=init_type, random_state=random_state)

Source File: generators.py From tslearn with BSD 2-Clause "Simplified" License

4 votes

def random_walks(n_ts=100, sz=256, d=1, mu=0., std=1., random_state=None):
    """Random walk time series generator.

    Generate n_ts time series of size sz and dimensionality d.
    Generated time series follow the model:

    .. math::

        ts[t] = ts[t - 1] + a

    where :math:`a` is drawn from a normal distribution of mean mu and standard
    deviation std.

    Parameters
    ----------
    n_ts : int (default: 100)
        Number of time series.
    sz : int (default: 256)
        Length of time series (number of time instants).
    d : int (default: 1)
        Dimensionality of time series.
    mu : float (default: 0.)
        Mean of the normal distribution from which random walk steps are drawn.
    std : float (default: 1.)
        Standard deviation of the normal distribution from which random walk
        steps are drawn.
    random_state : integer or numpy.RandomState or None (default: None)
        Generator used to draw the time series. If an integer is given, it
        fixes the seed. Defaults to the global
        numpy random number generator.

    Returns
    -------
    numpy.ndarray
        A dataset of random walk time series

    Examples
    --------
    >>> random_walks(n_ts=100, sz=256, d=5, mu=0., std=1.).shape
    (100, 256, 5)
    """
    rs = check_random_state(random_state)
    ts = numpy.empty((n_ts, sz, d))
    rnd = rs.randn(n_ts, sz, d) * std + mu
    ts[:, 0, :] = rnd[:, 0, :]
    for t in range(1, sz):
        ts[:, t, :] = ts[:, t - 1, :] + rnd[:, t, :]
    return ts

Source File: metrics.py From tslearn with BSD 2-Clause "Simplified" License

4 votes

def sigma_gak(dataset, n_samples=100, random_state=None):
    r"""Compute sigma value to be used for GAK.

    This method was originally presented in [1]_.

    Parameters
    ----------
    dataset
        A dataset of time series
    n_samples : int (default: 100)
        Number of samples on which median distance should be estimated
    random_state : integer or numpy.RandomState or None (default: None)
        The generator used to draw the samples. If an integer is given, it
        fixes the seed. Defaults to the global numpy random number generator.

    Returns
    -------
    float
        Suggested bandwidth (:math:`\sigma`) for the Global Alignment kernel

    Examples
    --------
    >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]]
    >>> sigma_gak(dataset=dataset,
    ...           n_samples=200,
    ...           random_state=0)  # doctest: +ELLIPSIS
    2.0...

    See Also
    --------
    gak : Compute Global Alignment kernel
    cdist_gak : Compute cross-similarity matrix using Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """
    random_state = check_random_state(random_state)
    dataset = to_time_series_dataset(dataset)
    n_ts, sz, d = dataset.shape
    if not check_equal_size(dataset):
        sz = numpy.min([ts_size(ts) for ts in dataset])
    if n_ts * sz < n_samples:
        replace = True
    else:
        replace = False
    sample_indices = random_state.choice(n_ts * sz,
                                         size=n_samples,
                                         replace=replace)
    dists = pdist(dataset[:, :sz, :].reshape((-1, d))[sample_indices],
                  metric="euclidean")
    return numpy.median(dists) * numpy.sqrt(sz)

Source File: ops.py From enspara with GNU General Public License v3.0

4 votes

def randind(local_array, random_state=None):
    """Given the local fragment of an assumed-larger array, give the
    location of a randomly chosen element of the array (uniformly
    distributed).

    Parameters
    ----------
    local_array : ndarray
        An array that's striped across multiple nodes in an MPI swarm.
    random_state : int or np.RandomState
        State of the RNG to use for the randomized part of the choice.

    Returns
    -------
    owner_rank : int
        Rank of the node that owns the element that's chosen.
    local_index : int
        Index within the owner node's local array.
    """

    random_state = check_random_state(random_state)

    # First thing, we need to find out how long all the local arrays are.
    n_states = np.array(mpi.comm.allgather(len(local_array)))
    assert np.all(n_states >= 0)

    if sum(n_states) < 1:
        raise DataInvalid(
            "Random choice requires a non-empty array. Got shapes: %s" %
            n_states)

    # Then, we select a random index from amongst the total lengths
    if mpi.rank() == 0:
        # this is modeled after numpy.random.choice, but for some reason
        # our formulation here gives the samer results.
        global_index = random_state.randint(sum(n_states))
    else:
        global_index = None

    global_index = mpi.comm.bcast(global_index, root=0)

    # this computation is the same as finding global_index % mpi.size() and
    # global_index // mpi.size() iff our data are 'packed' on nodes, but not
    # otherwise.

    concat = np.concatenate([np.arange(sum(n_states))[r::mpi.size()]
                             for r in range(mpi.size())])
    a = ra.RaggedArray(
        concat,
        lengths=n_states,
        error_checking=False)

    owner_rank, local_index = ra.where(a == global_index)
    owner_rank, local_index = owner_rank[0], local_index[0]

    assert local_index >= 0

    return (owner_rank, local_index)

Source File: threshold_policies.py From ml-fairness-gym with Apache License 2.0

4 votes

def _threshold_from_tpr(roc, tpr_target, rng):
  """Returns a `RandomizedThreshold` that achieves `tpr_target`.

  For an arbitrary value of tpr_target in [0, 1], there may not be a single
  threshold that achieves that tpr_value on our data. In this case, we
  interpolate between the two closest achievable points on the discrete ROC
  curve.

  See e.g., Theorem 1 of Scott et al (1998)
  "Maximum realisable performance: a principled method for enhancing
  performance by using multiple classifiers in variable cost problem domains"
  http://mi.eng.cam.ac.uk/reports/svr-ftp/auto-pdf/Scott_tr320.pdf

  Args:
    roc: A tuple (fpr, tpr, thresholds) as returned by sklearn's roc_curve
      function.
    tpr_target: A float between [0, 1], the target value of TPR that we would
      like to achieve.
    rng: A `np.RandomState` object that will be used in the returned
      RandomizedThreshold.
  Return: A RandomizedThreshold that achieves the target TPR value.
  """
  # First filter out points that are not on the convex hull.
  _, tpr_list, thresh_list = convex_hull_roc(roc)

  idx = bisect.bisect_left(tpr_list, tpr_target)

  # TPR target is larger than any of the TPR values in the list. In this case,
  # take the highest threshold possible.
  if idx == len(tpr_list):
    return RandomizedThreshold(
        weights=[1], values=[thresh_list[-1]], rng=rng, tpr_target=tpr_target)

  # TPR target is exactly achievable by an existing threshold. In this case,
  # do not randomize between two different thresholds. Use a single threshold
  # with probability 1.
  if tpr_list[idx] == tpr_target:
    return RandomizedThreshold(
        weights=[1], values=[thresh_list[idx]], rng=rng, tpr_target=tpr_target)

  # Interpolate between adjacent thresholds. Since we are only considering
  # points on the convex hull of the roc curve, we only need to consider
  # interpolating between pairs of adjacent points.
  alpha = _interpolate(x=tpr_target, low=tpr_list[idx - 1], high=tpr_list[idx])
  return RandomizedThreshold(
      weights=[alpha, 1 - alpha],
      values=[thresh_list[idx - 1], thresh_list[idx]],
      rng=rng,
      tpr_target=tpr_target)

Source File: feta_linear.py From cs-ranking with Apache License 2.0

4 votes

def __init__(
        self,
        learning_rate=1e-3,
        batch_size=256,
        loss_function=binary_crossentropy,
        epochs_drop=50,
        drop=0.01,
        random_state=None,
        **kwargs,
    ):
        """
        Parameters
        ----------
        learning_rate : float
            The learning rate used by the gradient descent optimizer.
        batch_size : int
            The size of the mini-batches used to train the Neural Network.
        loss_function
            The loss function to minimize when training the Neural Network. See
            the functions offered in the keras.losses module for more details.
        epochs_drop: int
            The amount of training epochs after which the learning rate is
            decreased by a factor of `drop`.
        drop: float
            The factor by which to decrease the learning rate every
            `epochs_drop` epochs.
        random_state: np.RandomState
            The random state to use in this object.
        """
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.random_state = random_state
        self.loss_function = loss_function
        self.epochs_drop = epochs_drop
        self.drop = drop
        self.current_lr = None
        self.weight1 = None
        self.bias1 = None
        self.weight2 = None
        self.bias2 = None
        self.w_out = None
        self.optimizer = None
        self.W_last = None

Python numpy.RandomState() Examples