Python chainer.optimizers.SGD Examples
The following are 15
code examples of chainer.optimizers.SGD().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.optimizers
, or try the search function
.
Example #1
Source File: test_optimizer.py From chainer with MIT License | 6 votes |
def test_cleargrad(self, backend_config): class CleargradHook(object): name = 'Cleargrad' timing = 'pre' def __init__(self, _): pass def __call__(self, opt): for param in opt.target.params(): # Clear all grads param.cleargrad() target = self.target target.to_device(backend_config.device) # TODO(niboshi): Do not use SGD in GradientMethod test opt = optimizers.SGD(lr=1) opt.setup(target) opt.add_hook(CleargradHook(self)) opt.add_hook(DummyHook(self)) opt.update()
Example #2
Source File: test_gradient_hard_clipping.py From chainer with MIT License | 6 votes |
def check_hardclipping(self, backend_configs): target = self.target assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] lower_bound = -0.9 upper_bound = 1.1 expects = [] # Compute expected for param, device in zip(target.params(), devices): expects.append(param.array - np.clip(param.grad, lower_bound, upper_bound)) param.to_device(device) # Apply optimizer_hook opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook( optimizer_hooks.GradientHardClipping(lower_bound, upper_bound)) opt.update() # Validate for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array)
Example #3
Source File: test_gradient_clipping.py From chainer with MIT License | 6 votes |
def check_clipping(self, backend_configs, rate): target = self.target norm = self.norm assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] threshold = norm * rate expects = [] for param, device in zip(target.params(), devices): expects.append(param.array - param.grad * min(1, rate)) param.to_device(device) opt = optimizers.SGD(lr=1) opt.setup(target) opt.add_hook( optimizer_hooks.GradientClipping(threshold)) opt.update() for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array)
Example #4
Source File: test_weight_decay.py From chainer with MIT License | 6 votes |
def check_weight_decay(self, backend_configs): target = self.target assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] decay = 0.2 # Compute expected expects = [] for param, device in zip(target.params(), devices): expects.append(param.array - param.grad - decay * param.array) param.to_device(device) opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer_hooks.WeightDecay(decay)) opt.update() # Validate for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array)
Example #5
Source File: plot_chainer_MLP.py From soft-dtw with BSD 2-Clause "Simplified" License | 6 votes |
def train(network, loss, X_tr, Y_tr, X_te, Y_te, n_epochs=30, gamma=1): model= Objective(network, loss=loss, gamma=gamma) #optimizer = optimizers.SGD() optimizer = optimizers.Adam() optimizer.setup(model) train = tuple_dataset.TupleDataset(X_tr, Y_tr) test = tuple_dataset.TupleDataset(X_te, Y_te) train_iter = iterators.SerialIterator(train, batch_size=1, shuffle=True) test_iter = iterators.SerialIterator(test, batch_size=1, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (n_epochs, 'epoch')) trainer.run()
Example #6
Source File: test_optimizer.py From chainer with MIT License | 5 votes |
def setUp(self): param0_data = np.random.uniform(-1, 1, self.shape).astype(self.dtype) param0_grad = np.copy(param0_data) param1_data = np.random.uniform(-1, 1, self.shape).astype(self.dtype) param1_grad = np.copy(param1_data) self.target = chainer.ChainList( SimpleLink(param0_data, param0_grad), SimpleLink(param1_data, param1_grad)) lr = 1.0 if self.loss_scale is not None: lr = self.loss_scale for i in range(2): self.target[i].param._loss_scale = self.loss_scale # TODO(niboshi): Do not use SGD in GradientMethod test self.optimizer = chainer.optimizers.SGD(lr)
Example #7
Source File: test_optimizers_by_linear_model.py From chainer with MIT License | 5 votes |
def create(self): return optimizers.SGD(0.1)
Example #8
Source File: test_gradient_noise.py From chainer with MIT License | 5 votes |
def check_gradient_noise(self, backend_configs): target = self.target assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] noise_value = np.asarray(self.noise_value) expects = [] # Compute expected for param, device in zip(target.params(), devices): expects.append(param.array - param.grad - noise_value) param.to_device(device) def test_noise(xp, shape, dtype, hook, opt): # Make noise value an array of current backend return xp.array(noise_value) noise = mock.Mock(side_effect=test_noise) opt = optimizers.SGD(lr=1) opt.setup(self.target) hook = optimizer_hooks.GradientNoise(self.eta, noise_func=noise) opt.add_hook(hook) opt.update() # Validate for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array) self.assertEqual(noise.call_count, len(tuple(self.target.params()))) calls = [] for param in target.params(): xp = param.device.xp calls.append(mock.call(xp, (2, 3), np.dtype('float32'), hook, param.update_rule)) # Order does not matter assert(any([noise.mock_calls == list(permuted_calls) for permuted_calls in itertools.permutations(calls)]))
Example #9
Source File: test_gradient_lars.py From chainer with MIT License | 5 votes |
def check_LARS(self, backend_configs): target = self.target devices = [bc.device for bc in backend_configs] assert len(backend_configs) == len(list(target[0].params())) assert len(backend_configs) == len(list(target[1].params())) threshold = 1e-2 weight_decay = 0.2 eps = 1e-9 expects0 = [] expects1 = [] # Compute expected for param, device in zip(target[0].params(), devices): p0_norm = np.linalg.norm(param.array) g0_norm = np.linalg.norm(param.grad) clip_rate = p0_norm / (eps + g0_norm + weight_decay * p0_norm) expects0.append(param.array - clip_rate * (param.grad + weight_decay * param.array)) param.to_device(device) for param, device in zip(target[1].params(), devices): expects1.append(param.array - 1.0 * (param.grad + weight_decay * param.array)) opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer_hooks.GradientLARS(threshold=threshold, weight_decay=weight_decay, eps=eps)) opt.update() for expect, param in zip(expects0, target[0].params()): testing.assert_allclose(expect, param.array) for expect, param in zip(expects1, target[1].params()): testing.assert_allclose(expect, param.array)
Example #10
Source File: test_weight_decay.py From chainer with MIT License | 5 votes |
def _updated_array(self, backend_config, loss_scale): arr = np.arange(3, dtype=np.float32) param = chainer.Parameter(arr) link = chainer.Link() with link.init_scope(): link.p = param link.to_device(backend_config.device) opt = optimizers.SGD(lr=1) opt.setup(link) opt.add_hook(optimizer_hooks.WeightDecay(1/8.)) loss = F.sum(link.p ** 3) loss.backward(loss_scale=loss_scale) opt.update() return link.p.array
Example #11
Source File: chainer_backend.py From Chimp with Apache License 2.0 | 5 votes |
def set_params(self, params): self.gpu = params.get('gpu',False) self.learning_rate = params.get('learning_rate',0.00025) self.decay_rate = params.get('decay_rate',0.95) self.discount = params.get('discount',0.95) self.clip_err = params.get('clip_err',False) self.target_net_update = params.get('target_net_update',10000) self.double_DQN = params.get('double_DQN',False) # setting up various possible gradient update algorithms opt = params.get('optim_name', 'ADAM') if opt == 'RMSprop': self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate) elif opt == 'ADADELTA': print("Supplied learning rate not used with ADADELTA gradient update method") self.optimizer = optimizers.AdaDelta() elif opt == 'ADAM': self.optimizer = optimizers.Adam(alpha=self.learning_rate) elif opt == 'SGD': self.optimizer = optimizers.SGD(lr=self.learning_rate) else: print('The requested optimizer is not supported!!!') exit() if self.clip_err is not False: self.optimizer.add_hook(chainer.optimizer.GradientClipping(self.clip_err)) self.optim_name = params['optim_name']
Example #12
Source File: test_gradient_scaling.py From chainercv with MIT License | 5 votes |
def check_gradient_scaling(self): w = self.target.param.array g = self.target.param.grad rate = 0.2 expect = w - g * rate opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(GradientScaling(rate)) opt.update() testing.assert_allclose(expect, w)
Example #13
Source File: train_cg.py From yukarin with MIT License | 5 votes |
def create_optimizer(model): cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop('name').lower() if n == 'adam': optimizer = optimizers.Adam(**cp) elif n == 'sgd': optimizer = optimizers.SGD(**cp) else: raise ValueError(n) optimizer.setup(model) return optimizer
Example #14
Source File: train.py From yukarin with MIT License | 5 votes |
def create_optimizer(model): cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop('name').lower() if n == 'adam': optimizer = optimizers.Adam(**cp) elif n == 'sgd': optimizer = optimizers.SGD(**cp) else: raise ValueError(n) optimizer.setup(model) return optimizer
Example #15
Source File: __init__.py From deel with MIT License | 5 votes |
def __init__(self,optimizer=None,vocab=None,n_input_units=1000, n_units=650,grad_clip=5,bproplen=35): if vocab is None: vocab=BatchTrainer.vocab self.vocab=vocab n_vocab = len(vocab) super(LSTM,self).__init__('LSTM') self.func = deel.model.lstm.RNNLM(n_input_units=n_input_units,n_vocab=n_vocab,n_units=n_units) self.func.compute_accuracy = False for param in self.func.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) if Deel.gpu>=0: self.func.to_gpu() if optimizer is None: self.optimizer = optimizers.SGD(lr=1.) self.optimizer.setup(self.func) self.clip = chainer.optimizer.GradientClipping(grad_clip) self.optimizer.add_hook(self.clip) self.accum_loss = 0 self.cur_log_perp = Deel.xp.zeros(())