Python torch.optim.Adagrad() Examples
The following are 30
code examples of torch.optim.Adagrad().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.optim
, or try the search function
.
Example #1
Source File: optimizer.py From ACAN with MIT License | 7 votes |
def create_optimizer(args, optim_params): if args.optimizer == 'sgd': return optim.SGD(optim_params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adagrad': return optim.Adagrad(optim_params, args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return optim.Adam(optim_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'amsgrad': return optim.Adam(optim_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, amsgrad=True) elif args.optimizer == 'adabound': from adabound import AdaBound return AdaBound(optim_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay) else: assert args.optimizer == 'amsbound' from adabound import AdaBound return AdaBound(optim_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay, amsbound=True)
Example #2
Source File: optimization.py From SeaRNN-open with MIT License | 6 votes |
def create_optimizer(parameters, opt): lr = opt.learning_rate # default learning rates: # sgd - 0.5, adagrad - 0.01, adadelta - 1, adam - 0.001, adamax - 0.002, asgd - 0.01, rmsprop - 0.01, rprop - 0.01 optim_method = opt.optim_method.casefold() if optim_method == 'sgd': optimizer = optim.SGD(parameters, lr=lr if lr else 0.5, weight_decay=opt.weight_decay) elif optim_method == 'adagrad': optimizer = optim.Adagrad(parameters, lr=lr if lr else 0.01, weight_decay=opt.weight_decay) elif optim_method == 'adadelta': optimizer = optim.Adadelta(parameters, lr=lr if lr else 1, weight_decay=opt.weight_decay) elif optim_method == 'adam': optimizer = optim.Adam(parameters, lr=lr if lr else 0.001, weight_decay=opt.weight_decay) elif optim_method == 'adamax': optimizer = optim.Adamax(parameters, lr=lr if lr else 0.002, weight_decay=opt.weight_decay) elif optim_method == 'asgd': optimizer = optim.ASGD(parameters, lr=lr if lr else 0.01, t0=5000, weight_decay=opt.weight_decay) elif optim_method == 'rmsprop': optimizer = optim.RMSprop(parameters, lr=lr if lr else 0.01, weight_decay=opt.weight_decay) elif optim_method == 'rprop': optimizer = optim.Rprop(parameters, lr=lr if lr else 0.01) else: raise RuntimeError("Invalid optim method: " + opt.optim_method) return optimizer
Example #3
Source File: optim.py From PaperRobot with MIT License | 6 votes |
def get_optimizer(model, lr_method, lr_rate): """ parse optimization method parameters, and initialize optimizer function """ lr_method_name = lr_method # initialize optimizer function if lr_method_name == 'sgd': optimizer = optim.SGD(model.parameters(), lr=lr_rate, momentum=0.9) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) elif lr_method_name == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=lr_rate) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) elif lr_method_name == 'adam': optimizer = optim.Adam(model.parameters(), lr=lr_rate) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.001) else: raise Exception('unknown optimization method.') return optimizer # , scheduler
Example #4
Source File: optimizers.py From PreSumm with MIT License | 6 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #5
Source File: Optim.py From DC-NeuralConversation with MIT License | 6 votes |
def set_parameters(self, params): self.params = [p for p in params if p.requires_grad] if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #6
Source File: train.py From Reinforce-Paraphrase-Generation with MIT License | 6 votes |
def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr if config.mode == 'MLE': self.optimizer = Adagrad(params, lr=0.15, initial_accumulator_value=0.1) else: self.optimizer = Adam(params, lr=initial_lr) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location= lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] return start_iter, start_loss
Example #7
Source File: main.py From AdaBound with Apache License 2.0 | 6 votes |
def create_optimizer(args, model_params): if args.optim == 'sgd': return optim.SGD(model_params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'adagrad': return optim.Adagrad(model_params, args.lr, weight_decay=args.weight_decay) elif args.optim == 'adam': return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optim == 'amsgrad': return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, amsgrad=True) elif args.optim == 'adabound': return AdaBound(model_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay) else: assert args.optim == 'amsbound' return AdaBound(model_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay, amsbound=True)
Example #8
Source File: Optim.py From pytorch-nlp with MIT License | 6 votes |
def set_parameters(self, params): self.params = [p for p in params if p.requires_grad] if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #9
Source File: train_utils.py From gnn-model-explainer with Apache License 2.0 | 6 votes |
def build_optimizer(args, params, weight_decay=0.0): filter_fn = filter(lambda p : p.requires_grad, params) if args.opt == 'adam': optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay) elif args.opt == 'sgd': optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay) elif args.opt == 'adagrad': optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay) if args.opt_scheduler == 'none': return None, optimizer elif args.opt_scheduler == 'step': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate) elif args.opt_scheduler == 'cos': scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart) return scheduler, optimizer
Example #10
Source File: Config.py From OpenNRE-PyTorch with MIT License | 6 votes |
def set_train_model(self, model): print("Initializing training model...") self.model = model self.trainModel = self.model(config = self) if self.pretrain_model != None: self.trainModel.load_state_dict(torch.load(self.pretrain_model)) self.trainModel.cuda() if self.optimizer != None: pass elif self.opt_method == "Adagrad" or self.opt_method == "adagrad": self.optimizer = optim.Adagrad(self.trainModel.parameters(), lr = self.learning_rate, lr_decay = self.lr_decay, weight_decay = self.weight_decay) elif self.opt_method == "Adadelta" or self.opt_method == "adadelta": self.optimizer = optim.Adadelta(self.trainModel.parameters(), lr = self.learning_rate, weight_decay = self.weight_decay) elif self.opt_method == "Adam" or self.opt_method == "adam": self.optimizer = optim.Adam(self.trainModel.parameters(), lr = self.learning_rate, weight_decay = self.weight_decay) else: self.optimizer = optim.SGD(self.trainModel.parameters(), lr = self.learning_rate, weight_decay = self.weight_decay) print("Finish initializing")
Example #11
Source File: Optim.py From reversible-rnn with MIT License | 6 votes |
def set_parameters(self, params): self.params = [p for p in params if p.requires_grad] if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #12
Source File: misc.py From ImageCaptioning.pytorch with MIT License | 6 votes |
def build_optimizer(params, opt): if opt.optim == 'rmsprop': return optim.RMSprop(params, opt.learning_rate, opt.optim_alpha, opt.optim_epsilon, weight_decay=opt.weight_decay) elif opt.optim == 'adagrad': return optim.Adagrad(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgd': return optim.SGD(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgdm': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay) elif opt.optim == 'sgdmom': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay, nesterov=True) elif opt.optim == 'adam': return optim.Adam(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) elif opt.optim == 'adamw': return optim.AdamW(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) else: raise Exception("bad option opt.optim: {}".format(opt.optim))
Example #13
Source File: optim.py From ASER with MIT License | 6 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=self.adam_eps) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #14
Source File: make_optimizer.py From MAMS-for-ABSA with Apache License 2.0 | 6 votes |
def make_optimizer(config, model): mode = config['mode'] config = config['aspect_' + mode + '_model'][config['aspect_' + mode + '_model']['type']] lr = config['learning_rate'] weight_decay = config['weight_decay'] opt = { 'sgd': optim.SGD, 'adadelta': optim.Adadelta, 'adam': optim.Adam, 'adamax': optim.Adamax, 'adagrad': optim.Adagrad, 'asgd': optim.ASGD, 'rmsprop': optim.RMSprop, 'adabound': adabound.AdaBound } if 'momentum' in config: optimizer = opt[config['optimizer']](model.parameters(), lr=lr, weight_decay=weight_decay, momentum=config['momentum']) else: optimizer = opt[config['optimizer']](model.parameters(), lr=lr, weight_decay=weight_decay) return optimizer
Example #15
Source File: Config.py From NeuralTripleTranslation with Apache License 2.0 | 6 votes |
def set_model(self, model): self.model = model self.trainModel = self.model(config=self) if self.use_cuda: self.trainModel = self.trainModel.cuda() if self.optimizer is not None: pass elif self.opt_method == "Adagrad" or self.opt_method == "adagrad": self.optimizer = optim.Adagrad(self.trainModel.parameters(), lr=self.alpha,lr_decay=self.lr_decay,weight_decay=self.weight_decay) elif self.opt_method == "Adadelta" or self.opt_method == "adadelta": self.optimizer = optim.Adadelta(self.trainModel.parameters(), lr=self.alpha) elif self.opt_method == "Adam" or self.opt_method == "adam": self.optimizer = optim.Adam(self.trainModel.parameters(), lr=self.alpha) else: self.optimizer = optim.SGD(self.trainModel.parameters(), lr=self.alpha)
Example #16
Source File: utils.py From VSUA-Captioning with MIT License | 6 votes |
def build_optimizer(params, opt): if opt.optim == 'rmsprop': return optim.RMSprop(params, opt.learning_rate, opt.optim_alpha, opt.optim_epsilon, weight_decay=opt.weight_decay) elif opt.optim == 'adagrad': return optim.Adagrad(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgd': return optim.SGD(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgdm': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay) elif opt.optim == 'sgdmom': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay, nesterov=True) elif opt.optim == 'adam': return optim.Adam(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) else: raise Exception("bad option opt.optim: {}".format(opt.optim)) # batch_size * feat_size -> (batch_size * count) * feat_size
Example #17
Source File: misc.py From self-critical.pytorch with MIT License | 6 votes |
def build_optimizer(params, opt): if opt.optim == 'rmsprop': return optim.RMSprop(params, opt.learning_rate, opt.optim_alpha, opt.optim_epsilon, weight_decay=opt.weight_decay) elif opt.optim == 'adagrad': return optim.Adagrad(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgd': return optim.SGD(params, opt.learning_rate, weight_decay=opt.weight_decay) elif opt.optim == 'sgdm': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay) elif opt.optim == 'sgdmom': return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay, nesterov=True) elif opt.optim == 'adam': return optim.Adam(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) elif opt.optim == 'adamw': return optim.AdamW(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay) else: raise Exception("bad option opt.optim: {}".format(opt.optim))
Example #18
Source File: optimizer.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, *, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0): """Implements Adagrad algorithm. It has been proposed in `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization`_. Arguments: lr (float, optional): learning rate (default: 1e-2) lr_decay (float, optional): learning rate decay (default: 0) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) .. _Adaptive Subgradient Methods for Online Learning and Stochastic Optimization: http://jmlr.org/papers/v12/duchi11a.html """ super().__init__(optim.Adagrad, lr=lr, lr_decay=lr_decay, weight_decay=weight_decay, initial_accumulator_value=initial_accumulator_value)
Example #19
Source File: train.py From pointer_summarizer with Apache License 2.0 | 6 votes |
def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location= lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss
Example #20
Source File: RHINE.py From OpenHINE with MIT License | 6 votes |
def set_model(self, model): self.model = model self.trainModel = self.model(config=self) self.trainModel.cuda() if self.optimizer is not None: pass elif self.opt_method == "Adagrad" or self.opt_method == "adagrad": self.optimizer = optim.Adagrad(self.trainModel.parameters(), lr=self.alpha, lr_decay=self.lr_decay, weight_decay=self.weight_decay) elif self.opt_method == "Adadelta" or self.opt_method == "adadelta": self.optimizer = optim.Adadelta( self.trainModel.parameters(), lr=self.alpha) elif self.opt_method == "Adam" or self.opt_method == "adam": self.optimizer = optim.Adam( self.trainModel.parameters(), lr=self.alpha) else: self.optimizer = optim.SGD( self.trainModel.parameters(), lr=self.alpha)
Example #21
Source File: optimizers.py From nlp-recipes with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != "sparseadam" or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == "sgd": self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == "adagrad": self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group["params"]: self.optimizer.state[p]["sum"] = self.optimizer.state[p][ "sum" ].fill_(self.adagrad_accum) elif self.method == "adadelta": self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == "adam": self.optimizer = optim.Adam( self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9 ) else: raise RuntimeError("Invalid optim method: " + self.method) self.param_groups = self.optimizer.param_groups self.state = self.optimizer.state
Example #22
Source File: optimizer.py From stog with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8, weight_decay=3e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #23
Source File: Optim.py From QG-Net with MIT License | 5 votes |
def set_parameters(self, params): self.params = [p for p in params if p.requires_grad] if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) else: raise RuntimeError("Invalid optim method: " + self.method) # We use the default parameters for Adam that are suggested by # the original paper https://arxiv.org/pdf/1412.6980.pdf # These values are also used by other established implementations, # e.g. https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer # https://keras.io/optimizers/ # Recently there are slightly different values used in the paper # "Attention is all you need" # https://arxiv.org/pdf/1706.03762.pdf, particularly the value beta2=0.98 # was used there however, beta2=0.999 is still arguably the more # established value, so we use that here as well
Example #24
Source File: optimizer.py From hiersumm with Apache License 2.0 | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #25
Source File: optimizers.py From BiSET with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #26
Source File: helper.py From transferable_sent2vec with MIT License | 5 votes |
def get_optimizer(s): """ Parse optimizer parameters. Input should be of the form: - "sgd,lr=0.01" - "adagrad,lr=0.1,lr_decay=0.05" """ if "," in s: method = s[:s.find(',')] optim_params = {} for x in s[s.find(',') + 1:].split(','): split = x.split('=') assert len(split) == 2 assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None optim_params[split[0]] = float(split[1]) else: method = s optim_params = {} if method == 'adadelta': optim_fn = optim.Adadelta elif method == 'adagrad': optim_fn = optim.Adagrad elif method == 'adam': optim_fn = optim.Adam elif method == 'rmsprop': optim_fn = optim.RMSprop elif method == 'sgd': optim_fn = optim.SGD assert 'lr' in optim_params else: raise Exception('Unknown optimization method: "%s"' % method) # check that we give good parameters to the optimizer expected_args = list(inspect.signature(optim_fn.__init__).parameters.keys()) assert expected_args[:2] == ['self', 'params'] if not all(k in expected_args[2:] for k in optim_params.keys()): raise Exception('Unexpected parameters: expected "%s", got "%s"' % ( str(expected_args[2:]), str(optim_params.keys()))) return optim_fn, optim_params
Example #27
Source File: Optim.py From graph-2-text with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #28
Source File: Optim.py From var-attn with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) print("Sparse parameter {}".format(k)) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=self.eps) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #29
Source File: optim.py From klcpd_code with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _makeOptimizer(self): if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr, weight_decay=self.weight_decay, momentum=self.momentum) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr, weight_decay=self.weight_decay) elif self.method == 'rmsprop': self.optimizer = optim.RMSprop(self.params, lr=self.lr, weight_decay=self.weight_decay, momentum=self.momentum) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, weight_decay=self.weight_decay) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #30
Source File: NeuralStyleTransfer.py From Neural-Style-Transfer-Audio with MIT License | 5 votes |
def get_input_param_optimizer(input_float): input_param = nn.Parameter(input_float.data) #optimizer = optim.Adagrad([input_param], lr=learning_rate_initial, lr_decay=0.0001,weight_decay=0) optimizer = optim.Adam([input_param], lr=learning_rate_initial, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) return input_param, optimizer