Python torch.optim.SparseAdam() Examples
The following are 17
code examples of torch.optim.SparseAdam().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.optim
, or try the search function
.
Example #1
Source File: optimizer.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, *, lr=0.001, betas=(0.9, 0.999), eps=1e-08): r"""Implements lazy version of Adam algorithm suitable for sparse tensors. In this variant, only moments that show up in the gradient get updated, and only those portions of the gradient get applied to the parameters. Arguments: lr (float, optional): learning rate (default: 1e-3) betas (Tuple[float, float], optional): coefficients used for computing running averages of gradient and its square (default: (0.9, 0.999)) eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8) .. _Adam\: A Method for Stochastic Optimization: https://arxiv.org/abs/1412.6980 """ super().__init__(optim.SparseAdam, lr=lr, betas=betas, eps=eps)
Example #2
Source File: optimizers.py From BertSum with Apache License 2.0 | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #3
Source File: optimizers.py From BiSET with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #4
Source File: optimizer.py From stog with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8, weight_decay=3e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #5
Source File: optimizer.py From hiersumm with Apache License 2.0 | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #6
Source File: Optim.py From var-attn with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) print("Sparse parameter {}".format(k)) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=self.eps) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #7
Source File: Optim.py From graph-2-text with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #8
Source File: Metapath2vec.py From OpenHINE with MIT License | 5 votes |
def train(self): for iteration in range(self.iterations): # print("\nIteration: " + str(iteration + 1)) optimizer = optim.SparseAdam(self.skip_gram_model.parameters(), lr=self.initial_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(self.dataloader)) running_loss = 0.0 epoch_loss = 0.0 n = 0 for i, sample_batched in enumerate(self.dataloader): if len(sample_batched[0]) > 1: pos_u = sample_batched[0].to(self.device) pos_v = sample_batched[1].to(self.device) neg_v = sample_batched[2].to(self.device) scheduler.step() optimizer.zero_grad() loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v) loss.backward() optimizer.step() # running_loss = running_loss * 0.9 + loss.item() * 0.1 epoch_loss += loss.item() # if i > 0 and i % 50 == 0: # print(" Loss: " + str(running_loss)) n = i print("epoch:" + str(iteration) + " Loss: " + str(epoch_loss / n)) self.skip_gram_model.save_embedding(self.data.id2word, self.output_file_name)
Example #9
Source File: Optim.py From data2text-entity-py with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #10
Source File: Optim.py From video-caption-openNMT.pytorch with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #11
Source File: optimizer.py From gtos with MIT License | 5 votes |
def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8, weight_decay=3e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #12
Source File: get_optimizer.py From PyMIC with Apache License 2.0 | 5 votes |
def get_optimiser(name, net_params, optim_params): lr = optim_params['learning_rate'] momentum = optim_params['momentum'] weight_decay = optim_params['weight_decay'] if(name == "SGD"): return optim.SGD(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Adam"): return optim.Adam(net_params, lr, weight_decay = 1e-5) elif(name == "SparseAdam"): return optim.SparseAdam(net_params, lr) elif(name == "Adadelta"): return optim.Adadelta(net_params, lr, weight_decay = weight_decay) elif(name == "Adagrad"): return optim.Adagrad(net_params, lr, weight_decay = weight_decay) elif(name == "Adamax"): return optim.Adamax(net_params, lr, weight_decay = weight_decay) elif(name == "ASGD"): return optim.ASGD(net_params, lr, weight_decay = weight_decay) elif(name == "LBFGS"): return optim.LBFGS(net_params, lr) elif(name == "RMSprop"): return optim.RMSprop(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Rprop"): return optim.Rprop(net_params, lr) else: raise ValueError("unsupported optimizer {0:}".format(name))
Example #13
Source File: utils.py From OpenKiwi with GNU Affero General Public License v3.0 | 5 votes |
def optimizer_class(name): if name == 'sgd': OptimizerClass = optim.SGD elif name == 'adagrad': OptimizerClass = optim.Adagrad elif name == 'adadelta': OptimizerClass = optim.Adadelta elif name == 'adam': OptimizerClass = optim.Adam elif name == 'sparseadam': OptimizerClass = optim.SparseAdam else: raise RuntimeError("Invalid optim method: " + name) return OptimizerClass
Example #14
Source File: optimizers.py From ITDD with MIT License | 5 votes |
def set_parameters(self, model): """ ? """ params = [p for p in model.parameters() if p.requires_grad] if self.method == 'sgd': self.optimizer = optim.SGD(params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad( self.params, lr=self.learning_rate, initial_accumulator_value=self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(params, lr=self.learning_rate) elif self.method == 'adafactor': self.optimizer = AdaFactor(params, non_constant_decay=True, enable_factorization=True, weight_decay=0) elif self.method == 'adam': self.optimizer = optim.Adam(params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': dense = [] sparse = [] for name, param in model.named_parameters(): if not param.requires_grad: continue # TODO: Find a better way to check for sparse gradients. if 'embed' in name: sparse.append(param) else: dense.append(param) self.optimizer = MultipleOptimizer( [optim.Adam(dense, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(sparse, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #15
Source File: metapath2vec.py From dgl with Apache License 2.0 | 5 votes |
def train(self): for iteration in range(self.iterations): print("\n\n\nIteration: " + str(iteration + 1)) optimizer = optim.SparseAdam(self.skip_gram_model.parameters(), lr=self.initial_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(self.dataloader)) running_loss = 0.0 for i, sample_batched in enumerate(tqdm(self.dataloader)): if len(sample_batched[0]) > 1: pos_u = sample_batched[0].to(self.device) pos_v = sample_batched[1].to(self.device) neg_v = sample_batched[2].to(self.device) scheduler.step() optimizer.zero_grad() loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v) loss.backward() optimizer.step() running_loss = running_loss * 0.9 + loss.item() * 0.1 if i > 0 and i % 500 == 0: print(" Loss: " + str(running_loss)) self.skip_gram_model.save_embedding(self.data.id2word, self.output_file_name)
Example #16
Source File: learner.py From emmental with MIT License | 4 votes |
def _set_optimizer(self, model: EmmentalModel) -> None: """Set optimizer for learning process. Args: model: The model to set up the optimizer. """ optimizer_config = Meta.config["learner_config"]["optimizer_config"] opt = optimizer_config["optimizer"] # If Meta.config["learner_config"]["optimizer_config"]["parameters"] is None, # create a parameter group with all parameters in the model, else load user # specified parameter groups. if optimizer_config["parameters"] is None: parameters = filter(lambda p: p.requires_grad, model.parameters()) else: parameters = optimizer_config["parameters"](model) optim_dict = { # PyTorch optimizer "asgd": optim.ASGD, # type: ignore "adadelta": optim.Adadelta, # type: ignore "adagrad": optim.Adagrad, # type: ignore "adam": optim.Adam, # type: ignore "adamw": optim.AdamW, # type: ignore "adamax": optim.Adamax, # type: ignore "lbfgs": optim.LBFGS, # type: ignore "rms_prop": optim.RMSprop, # type: ignore "r_prop": optim.Rprop, # type: ignore "sgd": optim.SGD, # type: ignore "sparse_adam": optim.SparseAdam, # type: ignore # Customize optimizer "bert_adam": BertAdam, } if opt in ["lbfgs", "r_prop", "sparse_adam"]: optimizer = optim_dict[opt]( parameters, lr=optimizer_config["lr"], **optimizer_config[f"{opt}_config"], ) elif opt in optim_dict.keys(): optimizer = optim_dict[opt]( parameters, lr=optimizer_config["lr"], weight_decay=optimizer_config["l2"], **optimizer_config[f"{opt}_config"], ) elif isinstance(opt, optim.Optimizer): # type: ignore optimizer = opt(parameters) else: raise ValueError(f"Unrecognized optimizer option '{opt}'") self.optimizer = optimizer if Meta.config["meta_config"]["verbose"]: logger.info(f"Using optimizer {self.optimizer}")
Example #17
Source File: classifier.py From metal with Apache License 2.0 | 4 votes |
def _set_optimizer(self, train_config): optimizer_config = train_config["optimizer_config"] opt = optimizer_config["optimizer"] # We set L2 here if the class does not implement its own L2 reg l2 = 0 if self.implements_l2 else train_config.get("l2", 0) parameters = filter(lambda p: p.requires_grad, self.parameters()) if opt == "sgd": optimizer = optim.SGD( parameters, **optimizer_config["optimizer_common"], **optimizer_config["sgd_config"], weight_decay=l2, ) elif opt == "rmsprop": optimizer = optim.RMSprop( parameters, **optimizer_config["optimizer_common"], **optimizer_config["rmsprop_config"], weight_decay=l2, ) elif opt == "adam": optimizer = optim.Adam( parameters, **optimizer_config["optimizer_common"], **optimizer_config["adam_config"], weight_decay=l2, ) elif opt == "sparseadam": optimizer = optim.SparseAdam( parameters, **optimizer_config["optimizer_common"], **optimizer_config["adam_config"], ) if l2: raise Exception( "SparseAdam optimizer does not support weight_decay (l2 penalty)." ) else: raise ValueError(f"Did not recognize optimizer option '{opt}'") self.optimizer = optimizer