Python Examples of chainer.Variable

Source File: test_action_value.py From chainerrl with MIT License

6 votes

def test_max_unbounded(self):
        n_batch = 7
        ndim_action = 3
        mu = np.random.randn(n_batch, ndim_action).astype(np.float32)
        mat = np.broadcast_to(
            np.eye(ndim_action, dtype=np.float32)[None],
            (n_batch, ndim_action, ndim_action))
        v = np.random.randn(n_batch).astype(np.float32)
        q_out = action_value.QuadraticActionValue(
            chainer.Variable(mu),
            chainer.Variable(mat),
            chainer.Variable(v))

        v_out = q_out.max
        self.assertIsInstance(v_out, chainer.Variable)
        v_out = v_out.array

        np.testing.assert_almost_equal(v_out, v)

Source File: chainer_alex.py From mlimages with MIT License

6 votes

def predict(limit):
    _limit = limit if limit > 0 else 5

    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP)
    label_def = LabelingMachine.read_label_def(LABEL_DEF_FILE)
    model = alex.Alex(len(label_def))
    serializers.load_npz(MODEL_FILE, model)

    i = 0
    for arr, im in td.generate():
        x = np.ndarray((1,) + arr.shape, arr.dtype)
        x[0] = arr
        x = chainer.Variable(np.asarray(x), volatile="on")
        y = model.predict(x)
        p = np.argmax(y.data)
        print("predict {0}, actual {1}".format(label_def[p], label_def[im.label]))
        im.image.show()
        i += 1
        if i >= _limit:
            break

Source File: empirical_normalization.py From chainerrl with MIT License

6 votes

def __call__(self, x, update=True):
        """Normalize mean and variance of values based on emprical values.

        Args:
            x (ndarray or Variable): Input values
            update (bool): Flag to learn the input values

        Returns:
            ndarray or Variable: Normalized output values
        """

        xp = self.xp
        mean = xp.broadcast_to(self._mean, x.shape)
        std_inv = xp.broadcast_to(self._std_inverse, x.shape)

        if update:
            self.experience(x)

        normalized = (x - mean) * std_inv
        if self.clip_threshold is not None:
            normalized = xp.clip(
                normalized, -self.clip_threshold, self.clip_threshold)
        return normalized

Source File: bound_by_tanh.py From chainerrl with MIT License

6 votes

def bound_by_tanh(x, low, high):
    """Bound a given value into [low, high] by tanh.

    Args:
        x (chainer.Variable): value to bound
        low (numpy.ndarray): lower bound
        high (numpy.ndarray): upper bound
    Returns: chainer.Variable
    """
    assert isinstance(x, chainer.Variable)
    assert low is not None
    assert high is not None
    xp = cuda.get_array_module(x.array)
    x_scale = (high - low) / 2
    x_scale = xp.expand_dims(xp.asarray(x_scale), axis=0)
    x_mean = (high + low) / 2
    x_mean = xp.expand_dims(xp.asarray(x_mean), axis=0)
    return F.tanh(x) * x_scale + x_mean

Source File: stateless_recurrent.py From chainerrl with MIT License

6 votes

def concatenate_sequences(sequences):
    """Concatenate sequences.

    Args:
        sequences (list): List of sequences. The following two cases are
            supported:
                - (a) Each sequence is a Variable or ndarray.
                - (b) Each sequence is tuple of a Variable or ndarray.

    Returns:
        chainer.Variable, ndarray or tuple: Concatenated sequences.
    """
    if isinstance(sequences[0], tuple):
        tuple_size = len(sequences[0])
        return tuple(
            F.concat([seq[i] for seq in sequences], axis=0)
            for i in range(tuple_size))
        raise NotImplementedError
    else:
        return F.concat(sequences, axis=0)

Source File: stateless_recurrent.py From chainerrl with MIT License

6 votes

def __call__(self, x, recurrent_state):
        """One-step batch forward computation.

        Args:
            x (chainer.Variable, ndarray, or tuple): One-step batched input.
            recurrent_state (object): Batched recurrent state.

        Returns:
            chainer.Variable, ndarray, or tuple: One-step batched output.
            object: New batched recurrent state.
        """
        assert isinstance(x, (chainer.Variable, self.xp.ndarray))
        return self.n_step_forward(
            split_one_step_batch_input(x),
            recurrent_state,
            output_mode='concat',
        )

Source File: test_trpo.py From chainerrl with MIT License

6 votes

def test(self):
        a = chainer.Variable(np.random.rand(1).astype(np.float32))
        b = chainer.Variable(np.random.rand(1).astype(np.float32))

        # No old-style function
        y = 2 * a + b
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(old_style_funcs, [])

        # One old-style function
        y = 2 * old_style_identity(a) + b
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(len(old_style_funcs), 1)
        self.assertTrue(all(isinstance(f, OldStyleIdentity)
                            for f in old_style_funcs))

        # Three old-style functions
        y = (2 * old_style_identity(old_style_identity(a))
             + old_style_identity(b))
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(len(old_style_funcs), 3)
        self.assertTrue(all(isinstance(f, OldStyleIdentity)
                            for f in old_style_funcs))

Source File: test_copy_param.py From chainerrl with MIT License

6 votes

def test_copy_param(self):
        a = L.Linear(1, 5)
        b = L.Linear(1, 5)

        s = chainer.Variable(np.random.rand(1, 1).astype(np.float32))
        a_out = list(a(s).array.ravel())
        b_out = list(b(s).array.ravel())
        self.assertNotEqual(a_out, b_out)

        # Copy b's parameters to a
        copy_param.copy_param(a, b)

        a_out_new = list(a(s).array.ravel())
        b_out_new = list(b(s).array.ravel())
        self.assertEqual(a_out_new, b_out)
        self.assertEqual(b_out_new, b_out)

Source File: test_boltzmann.py From chainerrl with MIT License

6 votes

def test_boltzmann(self):

        # T=1
        q_values = chainer.Variable(np.asarray([[-1, 1, 0]], dtype=np.float32))
        action_count = count_actions_selected_by_boltzmann(1, q_values)
        print('T=1', action_count)
        # Actions with larger values must be selected more often
        self.assertGreater(action_count[1], action_count[2])
        self.assertGreater(action_count[2], action_count[0])

        # T=0.5
        action_count_t05 = count_actions_selected_by_boltzmann(0.5, q_values)
        print('T=0.5', action_count_t05)
        # Actions with larger values must be selected more often
        self.assertGreater(action_count_t05[1], action_count_t05[2])
        self.assertGreater(action_count_t05[2], action_count_t05[0])

        # T=0.5 must be more greedy than T=1
        self.assertGreater(action_count_t05[1], action_count[1])

Source File: test_action_value.py From chainerrl with MIT License

6 votes

def test_compute_advantage(self):
        sample_actions = np.random.randint(self.action_size,
                                           size=self.batch_size)
        greedy_actions = self.q_values.argmax(axis=1)
        ret = self.qout.compute_advantage(sample_actions)
        self.assertIsInstance(ret, chainer.Variable)
        for b in range(self.batch_size):
            if sample_actions[b] == greedy_actions[b]:
                self.assertAlmostEqual(ret.array[b], 0)
            else:
                # An advantage to the optimal policy must be always negative
                self.assertLess(ret.array[b], 0)
                q = self.q_values[b, sample_actions[b]]
                v = self.q_values[b, greedy_actions[b]]
                adv = q - v
                self.assertAlmostEqual(ret.array[b], adv)

Source File: test_action_value.py From chainerrl with MIT License

6 votes

def setUp(self):

        def evaluator(actions):
            # negative square norm of actions
            return -F.sum(actions ** 2, axis=1)

        self.evaluator = evaluator

        if self.has_maximizer:
            def maximizer():
                return chainer.Variable(np.zeros(
                    (self.batch_size, self.action_size), dtype=np.float32))
        else:
            maximizer = None
        self.maximizer = maximizer
        self.av = action_value.SingleActionValue(
            evaluator=evaluator, maximizer=maximizer)

Source File: test_action_value.py From chainerrl with MIT License

6 votes

def test_compute_advantage(self):
        sample_actions = np.random.randint(self.action_size,
                                           size=self.batch_size)
        greedy_actions = self.q_values.argmax(axis=1)
        ret = self.qout.compute_advantage(sample_actions)
        self.assertIsInstance(ret, chainer.Variable)
        for b in range(self.batch_size):
            if sample_actions[b] == greedy_actions[b]:
                self.assertAlmostEqual(ret.array[b], 0)
            else:
                # An advantage to the optimal policy must be always negative
                self.assertLess(ret.array[b], 0)
                q = self.q_values[b, sample_actions[b]]
                v = self.q_values[b, greedy_actions[b]]
                adv = q - v
                self.assertAlmostEqual(ret.array[b], adv)

Source File: test_action_value.py From chainerrl with MIT License

6 votes

def test_getitem(self):
        n_batch = 7
        ndim_action = 3
        mu = np.random.randn(n_batch, ndim_action).astype(np.float32)
        mat = np.broadcast_to(
            np.eye(ndim_action, dtype=np.float32)[None],
            (n_batch, ndim_action, ndim_action))
        v = np.random.randn(n_batch).astype(np.float32)
        min_action, max_action = -1, 1
        qout = action_value.QuadraticActionValue(
            chainer.Variable(mu),
            chainer.Variable(mat),
            chainer.Variable(v),
            min_action,
            max_action,
        )
        sliced = qout[:3]
        np.testing.assert_equal(sliced.mu.array, mu[:3])
        np.testing.assert_equal(sliced.mat.array, mat[:3])
        np.testing.assert_equal(sliced.v.array, v[:3])
        np.testing.assert_equal(sliced.min_action, min_action)
        np.testing.assert_equal(sliced.max_action, max_action)

Source File: distribution.py From chainerrl with MIT License

5 votes

def most_probable(self):
        return chainer.Variable(
            np.argmax(self.all_prob.array, axis=1).astype(np.int32))

Source File: distribution.py From chainerrl with MIT License

5 votes

def sample(self):
        return chainer.Variable(sample_discrete_actions(self.all_prob.array))

Source File: test_async.py From chainerrl with MIT License

5 votes

def _assert_different_pointers_to_param_grad(a, b):
    assert isinstance(a, chainer.Link)
    assert isinstance(b, chainer.Link)
    a_params = dict(a.namedparams())
    b_params = dict(b.namedparams())
    assert set(a_params.keys()) == set(b_params.keys())
    for key in a_params.keys():
        assert isinstance(a_params[key], chainer.Variable)
        assert isinstance(b_params[key], chainer.Variable)
        assert (a_params[key].grad.ctypes.data
                != b_params[key].grad.ctypes.data)

Source File: test_async.py From chainerrl with MIT License

5 votes

def _assert_same_pointers_to_param_data(a, b):
    assert isinstance(a, chainer.Link)
    assert isinstance(b, chainer.Link)
    a_params = dict(a.namedparams())
    b_params = dict(b.namedparams())
    assert set(a_params.keys()) == set(b_params.keys())
    for key in a_params.keys():
        assert isinstance(a_params[key], chainer.Variable)
        assert isinstance(b_params[key], chainer.Variable)
        assert (a_params[key].array.ctypes.data
                == b_params[key].array.ctypes.data)

Source File: test_draw_computational_graph.py From chainerrl with MIT License

5 votes

def test_draw_computational_graph(self):
        x = chainer.Variable(np.zeros(5))
        y = x ** 2 + chainer.Variable(np.ones(5))
        dirname = tempfile.mkdtemp()
        filepath = os.path.join(dirname, 'graph')
        chainerrl.misc.draw_computational_graph(y, filepath)
        self.assertTrue(os.path.exists(filepath + '.gv'))
        if chainerrl.misc.is_graphviz_available():
            self.assertTrue(os.path.exists(filepath + '.png'))
        else:
            self.assertFalse(os.path.exists(filepath + '.png'))

Source File: test_draw_computational_graph.py From chainerrl with MIT License

5 votes

def _assert_eq_var_list(self, a, b):
        # Equality between two Variable lists
        self.assertEqual(len(a), len(b))
        self.assertTrue(isinstance(a, list))
        self.assertTrue(isinstance(b, list))
        for item in a:
            self.assertTrue(isinstance(item, chainer.Variable))
        for item in b:
            self.assertTrue(isinstance(item, chainer.Variable))
        for va, vb in zip(a, b):
            self.assertEqual(id(va), id(vb))

Source File: action_value.py From chainerrl with MIT License

5 votes

def greedy_actions(self):
        return chainer.Variable(
            self.q_values.array.argmax(axis=1).astype(np.int32))

Source File: action_value.py From chainerrl with MIT License

5 votes

def evaluate_actions_as_distribution(self, actions):
        """Return the return distributions of given actions.

        Args:
            actions (chainer.Variable or ndarray): Array of action indices.
                Its shape must be (batch_size,).

        Returns:
            chainer.Variable: Return distributions. Its shape will be
                (batch_size, n_atoms).
        """
        return self.q_dist[self.xp.arange(self.q_values.shape[0]), actions]

Source File: test_deterministic_policy.py From chainerrl with MIT License

5 votes

def _test_call(self, gpu):
        # This method only check if a given model can receive random input
        # data and return output data with the correct interface.
        nonlinearity = getattr(F, self.nonlinearity)
        min_action = np.full((self.action_size,), -0.01, dtype=np.float32)
        max_action = np.full((self.action_size,), 0.01, dtype=np.float32)
        model = self._make_model(
            n_input_channels=self.n_input_channels,
            action_size=self.action_size,
            bound_action=self.bound_action,
            min_action=min_action,
            max_action=max_action,
            nonlinearity=nonlinearity,
        )

        batch_size = 7
        x = np.random.rand(
            batch_size, self.n_input_channels).astype(np.float32)
        if gpu >= 0:
            model.to_gpu(gpu)
            x = chainer.cuda.to_gpu(x)
            min_action = chainer.cuda.to_gpu(min_action)
            max_action = chainer.cuda.to_gpu(max_action)
        y = model(x)
        self.assertTrue(isinstance(
            y, chainerrl.distribution.ContinuousDeterministicDistribution))
        a = y.sample()
        self.assertTrue(isinstance(a, chainer.Variable))
        self.assertEqual(a.shape, (batch_size, self.action_size))
        self.assertEqual(chainer.cuda.get_array_module(a),
                         chainer.cuda.get_array_module(x))
        if self.bound_action:
            self.assertTrue((a.array <= max_action).all())
            self.assertTrue((a.array >= min_action).all())

Source File: test_action_value.py From chainerrl with MIT License

5 votes

def test_compute_advantage(self):
        if not self.has_maximizer:
            return
        sample_actions = np.random.randn(
            self.batch_size, self.action_size).astype(np.float32)
        ret = self.av.compute_advantage(sample_actions)
        self.assertIsInstance(ret, chainer.Variable)
        np.testing.assert_equal(
            ret.array,
            (self.evaluator(sample_actions).array
                - self.evaluator(self.maximizer()).array))

Source File: test_action_value.py From chainerrl with MIT License

5 votes

def test_evaluate_actions(self):
        sample_actions = np.random.randn(
            self.batch_size, self.action_size).astype(np.float32)
        ret = self.av.evaluate_actions(sample_actions)
        self.assertIsInstance(ret, chainer.Variable)
        np.testing.assert_equal(
            ret.array, self.evaluator(sample_actions).array)

Source File: test_action_value.py From chainerrl with MIT License

5 votes

def test_greedy_actions(self):
        if not self.has_maximizer:
            return
        self.assertIsInstance(self.av.greedy_actions, chainer.Variable)
        np.testing.assert_equal(self.av.greedy_actions.array,
                                self.maximizer().array)

Source File: action_value.py From chainerrl with MIT License

5 votes

def evaluate_actions_as_quantiles(self, actions):
        """Return the return quantiles of given actions.

        Args:
            actions (chainer.Variable or ndarray): Array of action indices.
                Its shape must be (batch_size,).

        Returns:
            chainer.Variable: Return quantiles. Its shape will be
                (batch_size, n_taus).
        """
        if isinstance(actions, chainer.Variable):
            actions = actions.array
        return self.quantiles[
            self.xp.arange(self.quantiles.shape[0]), :, actions]

Source File: gaussian_policy.py From chainerrl with MIT License

5 votes

def __call__(self, mean):
        """Return a Gaussian with given mean.

        Args:
            mean (chainer.Variable or ndarray): Mean of Gaussian.

        Returns:
            chainerrl.distribution.Distribution: Gaussian whose mean is the
                mean argument and whose variance is computed from the parameter
                of this link.
        """
        var = F.broadcast_to(self.var_func(self.var_param), mean.shape)
        return distribution.GaussianDistribution(mean, var)

Source File: test_action_value.py From chainerrl with MIT License

5 votes

def test_max_bounded(self):
        n_batch = 20
        ndim_action = 3
        mu = np.random.randn(n_batch, ndim_action).astype(np.float32)
        mat = np.broadcast_to(
            np.eye(ndim_action, dtype=np.float32)[None],
            (n_batch, ndim_action, ndim_action))
        v = np.random.randn(n_batch).astype(np.float32)
        min_action, max_action = -1.3, 1.3
        q_out = action_value.QuadraticActionValue(
            chainer.Variable(mu),
            chainer.Variable(mat),
            chainer.Variable(v),
            min_action, max_action)

        v_out = q_out.max
        self.assertIsInstance(v_out, chainer.Variable)
        v_out = v_out.array

        # If mu[i] is an valid action, v_out[i] should be v[i]
        mu_is_allowed = np.all(
            (min_action < mu) * (mu < max_action),
            axis=1)
        np.testing.assert_almost_equal(v_out[mu_is_allowed], v[mu_is_allowed])

        # Otherwise, v_out[i] should be less than v[i]
        mu_is_not_allowed = ~np.all(
            (min_action - 1e-2 < mu) * (mu < max_action + 1e-2),
            axis=1)
        np.testing.assert_array_less(
            v_out[mu_is_not_allowed],
            v[mu_is_not_allowed])

Source File: test_action_value.py From chainerrl with MIT License

5 votes

def setUp(self):
        self.batch_size = 30
        self.action_size = 3
        self.n_taus = 5
        self.quantiles = np.random.normal(
            size=(self.batch_size, self.n_taus, self.action_size),
        ).astype(np.float32)
        self.av = action_value.QuantileDiscreteActionValue(
            chainer.Variable(self.quantiles))
        self.q_values = self.quantiles.mean(axis=1)

Source File: action_value.py From chainerrl with MIT License

5 votes

def __init__(self, q_dist, z_values, q_values_formatter=lambda x: x):
        assert isinstance(q_dist, chainer.Variable)
        assert not isinstance(z_values, chainer.Variable)
        assert q_dist.ndim == 3
        assert z_values.ndim == 1
        assert q_dist.shape[2] == z_values.shape[0]

        self.xp = cuda.get_array_module(q_dist.array)
        self.z_values = z_values
        self.q_values = F.sum(F.scale(q_dist, self.z_values, axis=2), axis=2)
        self.q_dist = q_dist
        self.n_actions = q_dist.array.shape[1]
        self.q_values_formatter = q_values_formatter

Python chainer.Variable() Examples