Python apex.amp() Examples
The following are 13
code examples of apex.amp().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
apex
, or try the search function
.
Example #1
Source File: argus_models.py From argus-freesound with MIT License | 6 votes |
def __init__(self, params): super().__init__(params) if 'aux' in params: self.aux_weights = params['aux']['weights'] else: self.aux_weights = None self.use_amp = not config.kernel and 'amp' in params if self.use_amp: from apex import amp self.amp = amp self.nn_module, self.optimizer = self.amp.initialize( self.nn_module, self.optimizer, opt_level=params['amp']['opt_level'], keep_batchnorm_fp32=params['amp']['keep_batchnorm_fp32'], loss_scale=params['amp']['loss_scale'] )
Example #2
Source File: distrib_parts.py From pytorch-lightning with Apache License 2.0 | 6 votes |
def single_gpu_train(self, model): # call setup self.setup('fit') if self.is_function_implemented('setup', model): model.setup('fit') model.cuda(self.root_gpu) # CHOOSE OPTIMIZER # allow for lr schedulers as well self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model) # TODO: remove with dropping NVIDIA AMP support if self.use_amp and not NATIVE_AMP_AVALAIBLE: # An example model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level) self.optimizers = optimizers self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers) self.run_pretrain_routine(model)
Example #3
Source File: argus_models.py From argus-freesound with MIT License | 5 votes |
def train_step(self, batch)-> dict: if not self.nn_module.training: self.nn_module.train() self.optimizer.zero_grad() input, target, noisy = self.prepare_batch(batch, self.device) prediction = self.nn_module(input) if self.aux_weights is not None: loss = 0 for pred, weight in zip(prediction, self.aux_weights): loss += self.loss(pred, target, noisy) * weight else: loss = self.loss(prediction, target, noisy) if self.use_amp: with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() self.optimizer.step() prediction = deep_detach(prediction) target = deep_detach(target) return { 'prediction': self.prediction_transform(prediction[0]), 'target': target, 'loss': loss.item(), 'noisy': noisy }
Example #4
Source File: classification_task.py From ClassyVision with MIT License | 5 votes |
def set_amp_args(self, amp_args: Optional[Dict[str, Any]]): """Disable / enable apex.amp and set the automatic mixed precision parameters. apex.amp can be utilized for mixed / half precision training. Args: amp_args: Dictionary containing arguments to be passed to amp.initialize. Set to None to disable amp. To enable mixed precision training, pass amp_args={"opt_level": "O1"} here. See https://nvidia.github.io/apex/amp.html for more info. Raises: RuntimeError: If opt_level is not None and apex is not installed. Warning: apex needs to be installed to utilize this feature. """ self.amp_args = amp_args if amp_args is None: logging.info(f"AMP disabled") else: if not apex_available: raise RuntimeError("apex is not installed, cannot enable amp") logging.info(f"AMP enabled with args {amp_args}") return self
Example #5
Source File: classification_task.py From ClassyVision with MIT License | 5 votes |
def get_classy_state(self, deep_copy: bool = False): """Returns serialiable state of task Args: deep_copy: If true, does a deep copy of state before returning. """ optimizer_state = {} if self.optimizer is not None: optimizer_state = self.optimizer.get_classy_state() classy_state_dict = { "train": self.train, "base_model": self.base_model.get_classy_state(), "meters": [meter.get_classy_state() for meter in self.meters], "optimizer": optimizer_state, "phase_idx": self.phase_idx, "train_phase_idx": self.train_phase_idx, "num_updates": self.num_updates, "losses": self.losses, "hooks": {hook.name(): hook.get_classy_state() for hook in self.hooks}, "loss": {}, } if "train" in self.datasets and self._is_checkpointable_dataset( self.datasets["train"] ): classy_state_dict["train_dataset_iterator"] = self.datasets[ "train" ].get_classy_state() if isinstance(self.loss, ClassyLoss): classy_state_dict["loss"] = self.loss.get_classy_state() if self.amp_args is not None: classy_state_dict["amp"] = apex.amp.state_dict() if deep_copy: classy_state_dict = copy.deepcopy(classy_state_dict) return classy_state_dict
Example #6
Source File: utils.py From fastNLP with Apache License 2.0 | 5 votes |
def _check_fp16(): if amp is None: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") if not torch.backends.cudnn.enabled: raise RuntimeError("Amp requires cudnn backend to be enabled.")
Example #7
Source File: infer.py From NLP_Toolkit with Apache License 2.0 | 5 votes |
def evaluate_corpus_bleu(args, early_stopping=True, stop_no=1000): args.batch_size = 1 train_iter, FR, EN, train_length = load_dataloaders(args) src_vocab = len(EN.vocab) trg_vocab = len(FR.vocab) cuda = torch.cuda.is_available() if args.fp16: from apex import amp else: amp = None net, _, _, _, _, _ = load_model_and_optimizer(args, src_vocab, \ trg_vocab, cuda, amp=amp) net.eval() trg_init = FR.vocab.stoi["<sos>"] trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0) logger.info("Evaluating corpus bleu...") refs = []; hyps = [] with torch.no_grad(): for i, data in tqdm(enumerate(train_iter), total=len(train_iter)): trg_input = trg_init labels = data.FR[:,1:].contiguous().view(-1) src_mask, trg_mask = create_masks(data.EN, trg_input) if cuda: data.EN = data.EN.cuda(); trg_input = trg_input.cuda(); labels = labels.cuda() src_mask = src_mask.cuda(); trg_mask = trg_mask.cuda() stepwise_translated_words, final_step_words = net(data.EN, trg_input, src_mask, None,\ infer=True, trg_vocab_obj=FR) refs.append([stepwise_translated_words]) # need to remove <eos> tokens hyps.append([FR.vocab.itos[i] for i in labels[:-1]]) if early_stopping and ((i + 1) % stop_no == 0): print(refs); print(hyps) break score = calculate_bleu(refs, hyps, corpus_level=True) print("Corpus bleu score: %.5f" % score) return score
Example #8
Source File: classification_task.py From ClassyVision with MIT License | 4 votes |
def set_classy_state(self, state): """Set task state Args: state: Dict containing state of a task """ # some settings are different in test only self.train = False if self.test_only else state["train"] if not self.test_only: self.phase_idx = state["phase_idx"] self.num_updates = state["num_updates"] self.train_phase_idx = state["train_phase_idx"] self.losses = state["losses"] for meter, meter_state in zip(self.meters, state["meters"]): meter.set_classy_state(meter_state) self.base_model.set_classy_state(state["base_model"]) if self.optimizer is not None: self.optimizer.set_classy_state(state["optimizer"]) if state.get("loss") and isinstance(self.loss, ClassyLoss): self.loss.set_classy_state(state["loss"]) if "amp" in state: apex.amp.load_state_dict(state["amp"]) for hook in self.hooks: # we still want to be able to run when new hooks are added or old # hooks are removed if hook.name() in state["hooks"]: hook.set_classy_state(state["hooks"][hook.name()]) else: logging.warn(f"No state found for hook: {hook.name()}") if "train" in self.datasets and self._is_checkpointable_dataset( self.datasets["train"] ): self.datasets["train"].set_classy_state(state.get("train_dataset_iterator")) # TODO (mannatsingh): Figure out how to set the state of the dataloaders # Re-build dataloader & re-create iterator. self._recreate_data_loader_from_dataset() self.create_data_iterator() # Set up pytorch module in train vs eval mode, update optimizer. self._set_model_train_mode()
Example #9
Source File: classification_task.py From ClassyVision with MIT License | 4 votes |
def train_step(self): """Train step to be executed in train loop.""" self.last_batch = None # Process next sample sample = next(self.get_data_iterator()) assert isinstance(sample, dict) and "input" in sample and "target" in sample, ( f"Returned sample [{sample}] is not a map with 'input' and" + "'target' keys" ) # Copy sample to GPU target = sample["target"] if self.use_gpu: sample = recursive_copy_to_gpu(sample, non_blocking=True) if self.mixup_transform is not None: sample = self.mixup_transform(sample) with torch.enable_grad(): # Forward pass output = self.model(sample["input"]) local_loss = self.compute_loss(output, sample) loss = local_loss.detach().clone() self.losses.append(loss.data.cpu().item() * target.size(0)) self.update_meters(output, sample) # Run backwards pass / update optimizer if self.amp_args is not None: self.optimizer.zero_grad() with apex.amp.scale_loss( local_loss, self.optimizer.optimizer ) as scaled_loss: scaled_loss.backward() else: self.optimizer.backward(local_loss) self.check_inf_nan(loss) self.optimizer.update_schedule_on_step(self.where) self.optimizer.step() self.num_updates += self.get_global_batchsize() # Move some data to the task so hooks get a chance to access it self.last_batch = LastBatchInfo( loss=loss, output=output, target=target, sample=sample )
Example #10
Source File: infer.py From NLP_Toolkit with Apache License 2.0 | 4 votes |
def __init__(self, args=None): if args is None: self.args = load_pickle("args.pkl") else: self.args = args self.cuda = torch.cuda.is_available() self.args.batch_size = 1 if self.args.model_no != 1: logger.info("Loading tokenizer and model...") self.tokenizer_en = tokener(args.src_lang) train_iter, FR, EN, train_length = load_dataloaders(self.args) self.FR = FR self.EN = EN self.train_iter = train_iter self.train_length = train_length self.src_vocab = len(EN.vocab) self.trg_vocab = len(FR.vocab) if self.args.fp16: from apex import amp else: amp = None self.amp = amp net, _, _, _, _, _ = load_model_and_optimizer(self.args, self.src_vocab, \ self.trg_vocab, self.cuda, amp=amp) self.net = net self.net.eval() trg_init = FR.vocab.stoi["<sos>"] self.trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0) elif self.args.model_no == 1: from .mass.interactive import Translator src, tgt = "zh-en".split('-') logger.info("Loading translator, tokenizer...") self.translator = Translator(data_path='./data/data-bin/processed_data_%s_%s' % (src, tgt),\ checkpoint_path="./data/checkpoints/%s_%s/checkpoint50.pt" % (src, tgt),\ task='translation',\ user_dir='',\ s=src, t=tgt,\ langs='%s,%s' % (src, tgt),\ mt_steps='%s-%s' % (src, tgt),\ source_langs=src,\ target_langs=tgt,\ beam=5,\ use_cuda=args.cuda)
Example #11
Source File: training_loop.py From pytorch-lightning with Apache License 2.0 | 4 votes |
def call_optimizer_step(self, optimizer, opt_idx, batch_idx, split_batch): # calls .step(), .zero_grad() # override function to modify this behavior model = self.get_model() with self.profiler.profile('optimizer_step'): lambda_closure = lambda: self.optimizer_closure( split_batch, batch_idx, opt_idx, optimizer, self.hiddens ).loss # apply TPU optimizer if self.use_tpu and XLA_AVAILABLE: model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure, on_tpu=True) # for LBFGS do something a bit different elif isinstance(optimizer, torch.optim.LBFGS): # native amp + lbfgs is a no go right now if self.use_amp and NATIVE_AMP_AVALAIBLE: raise MisconfigurationException( 'native PyTorch amp and lbfgs are not compatible.' ' To request, please file a Github issue in PyTorch and tag @mcarilli') model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure, using_lbfgs=True) # when using 16-bit else: native_amp = self.use_amp and NATIVE_AMP_AVALAIBLE model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure, using_native_amp=native_amp) # in native 16-bit we need to update scaler after optimizer step if self.use_amp and NATIVE_AMP_AVALAIBLE: self.scaler.update() # model hook model.on_before_zero_grad(optimizer) # clear gradients model.optimizer_zero_grad(self.current_epoch, batch_idx, optimizer, opt_idx)
Example #12
Source File: distrib_parts.py From pytorch-lightning with Apache License 2.0 | 4 votes |
def dp_train(self, model): # call setup after the ddp process has connected self.setup('fit') if self.is_function_implemented('setup', model): model.setup('fit') # CHOOSE OPTIMIZER # allow for lr schedulers as well self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model) model.cuda(self.root_gpu) # hack forward to do autocast for the user model_autocast_original_forward = model.forward if self.use_amp and NATIVE_AMP_AVALAIBLE: # wrap the user's forward in autocast and give it back at the end model.forward = torch.cuda.amp.autocast()(model.forward) # TODO: remove with dropping NVIDIA AMP support # check for this bug (amp + dp + !01 doesn't work) # https://github.com/NVIDIA/apex/issues/227 if self.use_dp and self.use_amp and not NATIVE_AMP_AVALAIBLE: if self.amp_level == 'O2': raise MisconfigurationException( f'Amp level {self.amp_level} with DataParallel is not supported.' f' See this note from NVIDIA for more info: https://github.com/NVIDIA/apex/issues/227.' f' We recommend you switch to ddp if you want to use amp') else: model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level) self.reinit_scheduler_properties(optimizers, self.lr_schedulers) # create list of device ids device_ids = self.data_parallel_device_ids if isinstance(device_ids, int): device_ids = list(range(device_ids)) # set dp device torch.cuda.set_device(self.root_gpu) model = LightningDataParallel(model, device_ids=device_ids) self.run_pretrain_routine(model) model.forward = model_autocast_original_forward
Example #13
Source File: distrib_parts.py From pytorch-lightning with Apache License 2.0 | 4 votes |
def horovod_train(self, model): # call setup after the ddp process has connected self.setup('fit') if self.is_function_implemented('setup', model): model.setup('fit') if torch.cuda.is_available() and self.on_gpu: # Horovod: pin GPU to local rank assert self.root_gpu == hvd.local_rank() torch.cuda.set_device(self.root_gpu) model.cuda(self.root_gpu) # avoid duplicating progress bar if hvd.rank() != 0 and self.progress_bar_callback is not None: self.progress_bar_callback.disable() # CHOOSE OPTIMIZER # allow for lr schedulers as well self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model) # Horovod: scale the learning rate by the number of workers to account for # increased total batch size for optimizer in self.optimizers: for param_group in optimizer.param_groups: param_group['lr'] *= hvd.size() if self.use_amp: # An example model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level) self.optimizers = optimizers self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers) # Horovod: broadcast parameters & optimizer state to ensure consistent initialization hvd.broadcast_parameters(model.state_dict(), root_rank=0) for optimizer in self.optimizers: hvd.broadcast_optimizer_state(optimizer, root_rank=0) def filter_named_parameters(model, optimizer): opt_params = set([p for group in optimizer.param_groups for p in group.get('params', [])]) return [(name, p) for name, p in model.named_parameters() if p in opt_params] # Horovod: wrap optimizers to perform gradient aggregation via allreduce self.optimizers = [ hvd.DistributedOptimizer(optimizer, named_parameters=filter_named_parameters(model, optimizer)) for optimizer in self.optimizers ] # Update logger rank info from Horovod to avoid race conditions from different ranks # creating directories / writing files in the same locations. self.global_rank = hvd.rank() rank_zero_only.rank = self.global_rank with ExitStack() as stack: for optimizer in self.optimizers: # Synchronization will be performed explicitly following backward() stack.enter_context(optimizer.skip_synchronize()) self.run_pretrain_routine(model) # Make sure all workers have finished training before returning to the user hvd.join()