Python chainer.optimizers.SGD Examples

The following are 15 code examples of chainer.optimizers.SGD(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.optimizers , or try the search function .
Example #1
Source File: test_optimizer.py    From chainer with MIT License 6 votes vote down vote up
def test_cleargrad(self, backend_config):

        class CleargradHook(object):

            name = 'Cleargrad'
            timing = 'pre'

            def __init__(self, _):
                pass

            def __call__(self, opt):
                for param in opt.target.params():
                    # Clear all grads
                    param.cleargrad()

        target = self.target
        target.to_device(backend_config.device)
        # TODO(niboshi): Do not use SGD in GradientMethod test
        opt = optimizers.SGD(lr=1)
        opt.setup(target)
        opt.add_hook(CleargradHook(self))
        opt.add_hook(DummyHook(self))

        opt.update() 
Example #2
Source File: test_gradient_hard_clipping.py    From chainer with MIT License 6 votes vote down vote up
def check_hardclipping(self, backend_configs):
        target = self.target
        assert len(backend_configs) == len(list(target.params()))
        devices = [bc.device for bc in backend_configs]

        lower_bound = -0.9
        upper_bound = 1.1
        expects = []
        # Compute expected
        for param, device in zip(target.params(), devices):
            expects.append(param.array - np.clip(param.grad,
                                                 lower_bound, upper_bound))
            param.to_device(device)

        # Apply optimizer_hook
        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(
            optimizer_hooks.GradientHardClipping(lower_bound, upper_bound))
        opt.update()

        # Validate
        for expect, param in zip(expects, target.params()):
            testing.assert_allclose(expect, param.array) 
Example #3
Source File: test_gradient_clipping.py    From chainer with MIT License 6 votes vote down vote up
def check_clipping(self, backend_configs, rate):
        target = self.target
        norm = self.norm
        assert len(backend_configs) == len(list(target.params()))
        devices = [bc.device for bc in backend_configs]

        threshold = norm * rate

        expects = []
        for param, device in zip(target.params(), devices):
            expects.append(param.array - param.grad * min(1, rate))
            param.to_device(device)

        opt = optimizers.SGD(lr=1)
        opt.setup(target)
        opt.add_hook(
            optimizer_hooks.GradientClipping(threshold))
        opt.update()

        for expect, param in zip(expects, target.params()):
            testing.assert_allclose(expect, param.array) 
Example #4
Source File: test_weight_decay.py    From chainer with MIT License 6 votes vote down vote up
def check_weight_decay(self, backend_configs):
        target = self.target
        assert len(backend_configs) == len(list(target.params()))
        devices = [bc.device for bc in backend_configs]

        decay = 0.2

        # Compute expected
        expects = []
        for param, device in zip(target.params(), devices):
            expects.append(param.array - param.grad - decay * param.array)
            param.to_device(device)

        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(optimizer_hooks.WeightDecay(decay))
        opt.update()

        # Validate
        for expect, param in zip(expects, target.params()):
            testing.assert_allclose(expect, param.array) 
Example #5
Source File: plot_chainer_MLP.py    From soft-dtw with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def train(network, loss, X_tr, Y_tr, X_te, Y_te, n_epochs=30, gamma=1):
    model= Objective(network, loss=loss, gamma=gamma)

    #optimizer = optimizers.SGD()
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    train = tuple_dataset.TupleDataset(X_tr, Y_tr)
    test = tuple_dataset.TupleDataset(X_te, Y_te)

    train_iter = iterators.SerialIterator(train, batch_size=1, shuffle=True)
    test_iter = iterators.SerialIterator(test, batch_size=1, repeat=False,
                                         shuffle=False)
    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (n_epochs, 'epoch'))

    trainer.run() 
Example #6
Source File: test_optimizer.py    From chainer with MIT License 5 votes vote down vote up
def setUp(self):
        param0_data = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
        param0_grad = np.copy(param0_data)
        param1_data = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
        param1_grad = np.copy(param1_data)
        self.target = chainer.ChainList(
            SimpleLink(param0_data, param0_grad),
            SimpleLink(param1_data, param1_grad))
        lr = 1.0
        if self.loss_scale is not None:
            lr = self.loss_scale
            for i in range(2):
                self.target[i].param._loss_scale = self.loss_scale
        # TODO(niboshi): Do not use SGD in GradientMethod test
        self.optimizer = chainer.optimizers.SGD(lr) 
Example #7
Source File: test_optimizers_by_linear_model.py    From chainer with MIT License 5 votes vote down vote up
def create(self):
        return optimizers.SGD(0.1) 
Example #8
Source File: test_gradient_noise.py    From chainer with MIT License 5 votes vote down vote up
def check_gradient_noise(self, backend_configs):
        target = self.target
        assert len(backend_configs) == len(list(target.params()))
        devices = [bc.device for bc in backend_configs]
        noise_value = np.asarray(self.noise_value)
        expects = []
        # Compute expected
        for param, device in zip(target.params(), devices):
            expects.append(param.array - param.grad - noise_value)
            param.to_device(device)

        def test_noise(xp, shape, dtype, hook, opt):
            # Make noise value an array of current backend
            return xp.array(noise_value)

        noise = mock.Mock(side_effect=test_noise)
        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        hook = optimizer_hooks.GradientNoise(self.eta, noise_func=noise)
        opt.add_hook(hook)
        opt.update()

        # Validate
        for expect, param in zip(expects, target.params()):
            testing.assert_allclose(expect, param.array)

        self.assertEqual(noise.call_count, len(tuple(self.target.params())))

        calls = []
        for param in target.params():
            xp = param.device.xp
            calls.append(mock.call(xp, (2, 3), np.dtype('float32'), hook,
                                   param.update_rule))

        # Order does not matter
        assert(any([noise.mock_calls == list(permuted_calls)
                    for permuted_calls in itertools.permutations(calls)])) 
Example #9
Source File: test_gradient_lars.py    From chainer with MIT License 5 votes vote down vote up
def check_LARS(self, backend_configs):
        target = self.target
        devices = [bc.device for bc in backend_configs]
        assert len(backend_configs) == len(list(target[0].params()))
        assert len(backend_configs) == len(list(target[1].params()))
        threshold = 1e-2
        weight_decay = 0.2
        eps = 1e-9

        expects0 = []
        expects1 = []
        # Compute expected
        for param, device in zip(target[0].params(), devices):
            p0_norm = np.linalg.norm(param.array)
            g0_norm = np.linalg.norm(param.grad)
            clip_rate = p0_norm / (eps + g0_norm + weight_decay * p0_norm)
            expects0.append(param.array - clip_rate
                            * (param.grad + weight_decay * param.array))
            param.to_device(device)

        for param, device in zip(target[1].params(), devices):
            expects1.append(param.array - 1.0
                            * (param.grad + weight_decay * param.array))

        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(optimizer_hooks.GradientLARS(threshold=threshold,
                                                  weight_decay=weight_decay,
                                                  eps=eps))
        opt.update()
        for expect, param in zip(expects0, target[0].params()):
            testing.assert_allclose(expect, param.array)
        for expect, param in zip(expects1, target[1].params()):
            testing.assert_allclose(expect, param.array) 
Example #10
Source File: test_weight_decay.py    From chainer with MIT License 5 votes vote down vote up
def _updated_array(self, backend_config, loss_scale):
        arr = np.arange(3, dtype=np.float32)
        param = chainer.Parameter(arr)
        link = chainer.Link()
        with link.init_scope():
            link.p = param
        link.to_device(backend_config.device)
        opt = optimizers.SGD(lr=1)
        opt.setup(link)
        opt.add_hook(optimizer_hooks.WeightDecay(1/8.))
        loss = F.sum(link.p ** 3)
        loss.backward(loss_scale=loss_scale)
        opt.update()
        return link.p.array 
Example #11
Source File: chainer_backend.py    From Chimp with Apache License 2.0 5 votes vote down vote up
def set_params(self, params):

        self.gpu = params.get('gpu',False)
        self.learning_rate = params.get('learning_rate',0.00025)
        self.decay_rate = params.get('decay_rate',0.95)
        self.discount = params.get('discount',0.95)
        self.clip_err = params.get('clip_err',False)
        self.target_net_update = params.get('target_net_update',10000)
        self.double_DQN = params.get('double_DQN',False)

        # setting up various possible gradient update algorithms
        opt = params.get('optim_name', 'ADAM')
        if opt == 'RMSprop':
            self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate)

        elif opt == 'ADADELTA':
            print("Supplied learning rate not used with ADADELTA gradient update method")
            self.optimizer = optimizers.AdaDelta()

        elif opt == 'ADAM':
            self.optimizer = optimizers.Adam(alpha=self.learning_rate)

        elif opt == 'SGD':
            self.optimizer = optimizers.SGD(lr=self.learning_rate)

        else:
            print('The requested optimizer is not supported!!!')
            exit()

        if self.clip_err is not False:
            self.optimizer.add_hook(chainer.optimizer.GradientClipping(self.clip_err))

        self.optim_name = params['optim_name'] 
Example #12
Source File: test_gradient_scaling.py    From chainercv with MIT License 5 votes vote down vote up
def check_gradient_scaling(self):
        w = self.target.param.array
        g = self.target.param.grad

        rate = 0.2
        expect = w - g * rate

        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(GradientScaling(rate))
        opt.update()

        testing.assert_allclose(expect, w) 
Example #13
Source File: train_cg.py    From yukarin with MIT License 5 votes vote down vote up
def create_optimizer(model):
        cp: Dict[str, Any] = copy(config.train.optimizer)
        n = cp.pop('name').lower()

        if n == 'adam':
            optimizer = optimizers.Adam(**cp)
        elif n == 'sgd':
            optimizer = optimizers.SGD(**cp)
        else:
            raise ValueError(n)

        optimizer.setup(model)
        return optimizer 
Example #14
Source File: train.py    From yukarin with MIT License 5 votes vote down vote up
def create_optimizer(model):
        cp: Dict[str, Any] = copy(config.train.optimizer)
        n = cp.pop('name').lower()

        if n == 'adam':
            optimizer = optimizers.Adam(**cp)
        elif n == 'sgd':
            optimizer = optimizers.SGD(**cp)
        else:
            raise ValueError(n)

        optimizer.setup(model)
        return optimizer 
Example #15
Source File: __init__.py    From deel with MIT License 5 votes vote down vote up
def __init__(self,optimizer=None,vocab=None,n_input_units=1000,
					n_units=650,grad_clip=5,bproplen=35):

		if vocab is None:
			vocab=BatchTrainer.vocab
		self.vocab=vocab
		n_vocab = len(vocab)
		super(LSTM,self).__init__('LSTM')

		self.func = deel.model.lstm.RNNLM(n_input_units=n_input_units,n_vocab=n_vocab,n_units=n_units)
		self.func.compute_accuracy = False 
		for param in self.func.params():
			data = param.data
			data[:] = np.random.uniform(-0.1, 0.1, data.shape)


		if Deel.gpu>=0:
			self.func.to_gpu()


		if optimizer is None:
			self.optimizer = optimizers.SGD(lr=1.)
		self.optimizer.setup(self.func)
		self.clip = chainer.optimizer.GradientClipping(grad_clip)
		self.optimizer.add_hook(self.clip)

		self.accum_loss = 0
		self.cur_log_perp =  Deel.xp.zeros(())