Python apex.amp.initialize() Examples
The following are 30
code examples of apex.amp.initialize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
apex.amp
, or try the search function
.
Example #1
Source File: test_larc.py From apex with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_larc_mixed_precision(self): for opt_level in ["O0", "O1", "O2", "O3"]: model = MyModel(1) optimizer = LARC( torch.optim.SGD( [{"params": model.parameters(), "lr": 0.25}], momentum=0.125 ) ) model, optimizer = amp.initialize( model, optimizer, opt_level=opt_level, verbosity=0 ) optimizer.zero_grad() loss = model(self.x) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step()
Example #2
Source File: test_lr_finder.py From pytorch-lr-finder with MIT License | 6 votes |
def test_gradient_accumulation_with_apex_amp(self, mocker): desired_bs, accum_steps = 32, 4 real_bs = desired_bs // accum_steps num_iter = 10 task = mod_task.XORTask(batch_size=real_bs) # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires # CUDA GPU. So we have to move model to GPU first. model, optimizer, device = task.model, task.optimizer, task.device model = model.to(device) task.model, task.optimizer = amp.initialize(model, optimizer) lr_finder = prepare_lr_finder(task) spy = mocker.spy(amp, "scale_loss") lr_finder.range_test( task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps ) assert spy.call_count == accum_steps * num_iter
Example #3
Source File: test_lr_finder.py From pytorch-lr-finder with MIT License | 6 votes |
def test_mixed_precision(self, mocker): batch_size = 32 num_iter = 10 task = mod_task.XORTask(batch_size=batch_size) # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires # CUDA GPU. So we have to move model to GPU first. model, optimizer, device = task.model, task.optimizer, task.device model = model.to(device) task.model, task.optimizer = amp.initialize(model, optimizer) assert hasattr(task.optimizer, "_amp_stash") lr_finder = prepare_lr_finder(task) spy = mocker.spy(amp, "scale_loss") lr_finder.range_test(task.train_loader, num_iter=num_iter) # NOTE: Here we did not perform gradient accumulation, so that call count # of `amp.scale_loss` should equal to `num_iter`. assert spy.call_count == num_iter
Example #4
Source File: mixed_precision.py From amdim-public with MIT License | 6 votes |
def initialize(model, optimizers): """Initialize mixed precision Arguments: model {nn.Module} -- The model to convert optimizers -- The model Returns: [nn.Module, Optimizer] -- Converted model and optimizer """ if is_mixed_precision(): from apex import amp if optimizers is not None: model, optimizers = \ amp.initialize(model, optimizers, opt_level=get_optim_level()) else: model = amp.initialize(model, opt_level=get_optim_level()) return model, optimizers
Example #5
Source File: trace.py From catalyst with Apache License 2.0 | 5 votes |
def load_traced_model( model_path: Union[str, Path], device: Device = "cpu", opt_level: str = None, ) -> jit.ScriptModule: """Loads a traced model. Args: model_path: Path to traced model device (str): Torch device opt_level (str): Apex FP16 init level, optional Returns: ScriptModule: Traced model """ # jit.load dont work with pathlib.Path model_path = str(model_path) if opt_level is not None: device = "cuda" model = jit.load(model_path, map_location=device) if opt_level is not None: assert_fp16_available() from apex import amp model = amp.initialize(model, optimizers=None, opt_level=opt_level) return model
Example #6
Source File: utils.py From machina with MIT License | 5 votes |
def make_model_distributed(model, optim, use_apex=False, apex_opt_level="O0", apex_keep_batchnorm_fp32=True, apex_sync_bn=False, apex_loss_scale=None, device_ids=None, output_device=None, ): """Return model for distributed trainings. Note that returned model shares parameters with the original model. """ if use_apex: global amp global apex import apex.parallel from apex import amp ddp_model, optim = amp.initialize(model, optim, opt_level=apex_opt_level, keep_batchnorm_fp32=apex_keep_batchnorm_fp32, loss_scale=apex_loss_scale) ddp_cls = wrap_ddp(apex.parallel.DistributedDataParallel) ddp_model = ddp_cls(ddp_model) if apex_sync_bn: ddp_model = apex.parallel.convert_syncbn_model(model) else: ddp_cls = wrap_ddp(nn.parallel.DistributedDataParallel) ddp_model = ddp_cls(model, device_ids, output_device) return ddp_model, optim
Example #7
Source File: network_trainer.py From nnUNet with Apache License 2.0 | 5 votes |
def initialize(self, training=True): """ create self.output_folder modify self.output_folder if you are doing cross-validation (one folder per fold) set self.tr_gen and self.val_gen call self.initialize_network and self.initialize_optimizer_and_scheduler (important!) finally set self.was_initialized to True :param training: :return: """
Example #8
Source File: network_trainer.py From nnUNet with Apache License 2.0 | 5 votes |
def load_checkpoint(self, fname, train=True): self.print_to_log_file("loading checkpoint", fname, "train=", train) if not self.was_initialized: self.initialize(train) # saved_model = torch.load(fname, map_location=torch.device('cuda', torch.cuda.current_device())) saved_model = torch.load(fname, map_location=torch.device('cpu')) self.load_checkpoint_ram(saved_model, train)
Example #9
Source File: network_trainer.py From nnUNet with Apache License 2.0 | 5 votes |
def initialize_network(self): """ initialize self.network here :return: """ pass
Example #10
Source File: network_trainer.py From nnUNet with Apache License 2.0 | 5 votes |
def initialize_optimizer_and_scheduler(self): """ initialize self.optimizer and self.lr_scheduler (if applicable) here :return: """ pass
Example #11
Source File: nnUNetTrainerV2_Mish.py From nnUNet with Apache License 2.0 | 5 votes |
def _maybe_init_amp(self): """ In O1 mish will result in super super high memory usage. I believe that may be because amp decides to be save and use fp32 for all activation functions. By using O2 we reduce memory comsumption by a lot :return: """ # we use fp16 for training only, not inference if self.fp16 and torch.cuda.is_available(): if not self.amp_initialized: if amp is not None: self.network, self.optimizer = amp.initialize(self.network, self.optimizer, opt_level="O2") self.amp_initialized = True else: self.print_to_log_file("WARNING: FP16 training was requested but nvidia apex is not installed. " "Install it from https://github.com/NVIDIA/apex")
Example #12
Source File: nnUNetTrainerV2_O2.py From nnUNet with Apache License 2.0 | 5 votes |
def _maybe_init_amp(self): if self.fp16 and torch.cuda.is_available(): if not self.amp_initialized: if amp is not None: self.network, self.optimizer = amp.initialize(self.network, self.optimizer, opt_level="O1") self.amp_initialized = True else: raise RuntimeError("WARNING: FP16 training was requested but nvidia apex is not installed. " "Install it from https://github.com/NVIDIA/apex")
Example #13
Source File: optimization.py From FARM with Apache License 2.0 | 5 votes |
def _init_amp(model, device, optimizer=None, use_amp=None): model = model.to(device) if use_amp and optimizer: if AMP_AVAILABLE: model, optimizer = amp.initialize(model, optimizer, opt_level=use_amp) else: logger.warning(f"Can't find AMP although you specificed to use amp with level {use_amp}. Will continue without AMP ...") return model, optimizer
Example #14
Source File: training.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def initialize(config): model = config.model.to(config.device) optimizer = config.optimizer # Setup Nvidia/Apex AMP model, optimizer = amp.initialize(model, optimizer, opt_level=getattr(config, "fp16_opt_level", "O2"), num_losses=1) # Adapt model to dist conf model = idist.auto_model(model) criterion = config.criterion.to(config.device) return model, optimizer, criterion
Example #15
Source File: training.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def initialize(config): model = config.model.to(config.device) optimizer = config.optimizer # Setup Nvidia/Apex AMP model, optimizer = amp.initialize(model, optimizer, opt_level=getattr(config, "fp16_opt_level", "O2"), num_losses=1) # Adapt model to dist conf model = idist.auto_model(model) criterion = config.criterion.to(config.device) return model, optimizer, criterion
Example #16
Source File: test_checkpointing.py From apex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_state_dict(self): for opt_level in self.test_opt_levels: # Skip O3 if opt_level == 'O3': continue model = MyModel().to('cuda') optimizer = optim.Adam(model.parameters(), lr=1e-3) model, optimizer = amp.initialize( model, optimizer, opt_level=opt_level, verbosity=0) # Export state_dict and check for Half state_dict = model.state_dict() for key in state_dict: self.assertFalse('Half' in state_dict[key].type()) # Check, if model is still trainable # Create dummy data data = torch.randn(10, 3, 4, 4, device='cuda') target = torch.randn(10, 6, 4, 4, device='cuda') # Get initnial loss optimizer.zero_grad() output = model(data) loss = F.mse_loss(output, target) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() last_loss = loss.item() # train for some epochs for epoch in range(10): optimizer.zero_grad() output = model(data) loss = F.mse_loss(output, target) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() self.assertTrue(loss.item() < last_loss) last_loss = loss.item()
Example #17
Source File: keker.py From kekas with MIT License | 5 votes |
def to_fp16(self, **amp_params): """Use NVIDIA apex library for mixed precision training. After calling this method, all operations will be used in mixed precision. Returns: self """ amp_params = dict({"opt_level": "O1", "verbosity": 0}, **amp_params) self.state.core.amp_params = amp_params self.state.core.model = amp.initialize(self.state.core.model, **amp_params) self.state.core.use_fp16 = True return self
Example #18
Source File: training.py From tape with BSD 3-Clause "New" or "Revised" License | 5 votes |
def initialize_fp16(self): if self.fp16: self.model, self.optimizer = amp.initialize( self.model, self.optimizer, opt_level="O2", loss_scale="dynamic", master_weights=True) _amp_state.loss_scalers[0]._loss_scale = 2 ** 20
Example #19
Source File: main.py From kaggle-rcic-1st with MIT License | 5 votes |
def predict(args, model): """Entrypoint for predict mode""" test_loader = dataset.get_test_loader(args) train_loader, val_loader = dataset.get_train_val_loader(args, predict=True) if args.fp16: model = amp.initialize(model, opt_level='O1') logging.info('Starting prediction') output = {} for k, loader in [('test', test_loader), ('val', val_loader)]: output[k] = {} res = infer(args, model, loader) for i, v in res.items(): d = loader.dataset.data[i] name = '{}_{}_{}'.format(d[0], d[1], d[2]) if name not in output[k]: output[k][name] = [] output[k][name].append(v) logging.info('Saving predictions to {}'.format(args.load + '.output' + args.pred_suffix)) with open(args.load + '.output' + args.pred_suffix, 'wb') as file: pickle.dump(output, file)
Example #20
Source File: train_ddp.py From space_time_pde with MIT License | 5 votes |
def setup(rank, world_size, offset=0): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = str(12355+offset) # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=world_size) # Explicitly setting seed to make sure that models created in two processes # start from same random weights and biases. torch.manual_seed(42)
Example #21
Source File: run_seq2seq.py From unilm with MIT License | 5 votes |
def prepare_for_training(args, model, checkpoint_state_dict, amp): no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) if amp: model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) if checkpoint_state_dict: amp.load_state_dict(checkpoint_state_dict['amp']) if checkpoint_state_dict: optimizer.load_state_dict(checkpoint_state_dict['optimizer']) model.load_state_dict(checkpoint_state_dict['model']) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) return model, optimizer
Example #22
Source File: wrapper.py From pytorch-tools with MIT License | 5 votes |
def __init__( self, model, optimizer, criterion, metrics=None, callbacks=ConsoleLogger(), gradient_clip_val=0, accumulate_steps=1, ): super().__init__() if not hasattr(amp._amp_state, "opt_properties"): model_optimizer = amp.initialize(model, optimizer, enabled=False) model, optimizer = (model_optimizer, None) if optimizer is None else model_optimizer self.state = RunnerState(model=model, optimizer=optimizer, criterion=criterion, metrics=metrics,) self.callbacks = Callbacks(callbacks) self.callbacks.set_state(self.state) self.gradient_clip_val = gradient_clip_val self.accumulate_steps = accumulate_steps
Example #23
Source File: train.py From DeepPrivacy with MIT License | 5 votes |
def load_checkpoint(self): try: map_location = "cuda:0" if torch.cuda.is_available() else "cpu" ckpt = load_checkpoint(self.checkpoint_dir, map_location=map_location) # Transition settings self.is_transitioning = ckpt["is_transitioning"] self.transition_step = ckpt["transition_step"] self.current_imsize = ckpt["current_imsize"] self.latest_switch = ckpt["latest_switch"] # Tracking stats self.global_step = ckpt["global_step"] self.start_time = time.time() - ckpt["total_time"] * 60 self.num_skipped_steps = ckpt["num_skipped_steps"] # Models self.discriminator.load_state_dict(ckpt['D']) self.generator.load_state_dict(ckpt['G']) self.running_average_generator.load_state_dict( ckpt["running_average_generator"]) to_cuda([self.generator, self.discriminator, self.running_average_generator]) self.running_average_generator = amp.initialize(self.running_average_generator, None, opt_level=self.opt_level) self.init_optimizers() self.d_optimizer.load_state_dict(ckpt['d_optimizer']) self.g_optimizer.load_state_dict(ckpt['g_optimizer']) return True except FileNotFoundError as e: print(e) print(' [*] No checkpoint!') return False
Example #24
Source File: train.py From DeepPrivacy with MIT License | 5 votes |
def init_running_average_generator(self): self.running_average_generator = Generator(self.pose_size, self.start_channel_size, self.image_channels) self.running_average_generator = wrap_models( self.running_average_generator) to_cuda(self.running_average_generator) self.running_average_generator = amp.initialize(self.running_average_generator, None, opt_level=self.opt_level)
Example #25
Source File: train.py From DeepPrivacy with MIT License | 5 votes |
def extend_running_average_generator(self): g = self.running_average_generator g.extend() for avg_param, cur_param in zip(g.new_parameters(), self.generator.new_parameters()): assert avg_param.data.shape == cur_param.data.shape, "AVG param: {}, cur_param: {}".format( avg_param.shape, cur_param.shape) avg_param.data = cur_param.data to_cuda(g) self.running_average_generator = amp.initialize( self.running_average_generator, None, opt_level=self.opt_level)
Example #26
Source File: train.py From DeepPrivacy with MIT License | 5 votes |
def initialize_amp(self): to_cuda([self.generator, self.discriminator]) [self.generator, self.discriminator], [self.g_optimizer, self.d_optimizer] = amp.initialize( [self.generator, self.discriminator], [self.g_optimizer, self.d_optimizer], opt_level=self.opt_level, num_losses=4)
Example #27
Source File: torch_runner.py From ray with Apache License 2.0 | 5 votes |
def _try_setup_apex(self): """Sets up the model for fp16 training via apex if available.""" if self.use_fp16 and amp: self.models, self.optimizers = amp.initialize( self.models, self.optimizers, **self.apex_args)
Example #28
Source File: inference.py From waveglow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16, denoiser_strength): mel_files = files_to_list(mel_files) waveglow = torch.load(waveglow_path)['model'] waveglow = waveglow.remove_weightnorm(waveglow) waveglow.cuda().eval() if is_fp16: from apex import amp waveglow, _ = amp.initialize(waveglow, [], opt_level="O3") if denoiser_strength > 0: denoiser = Denoiser(waveglow).cuda() for i, file_path in enumerate(mel_files): file_name = os.path.splitext(os.path.basename(file_path))[0] mel = torch.load(file_path) mel = torch.autograd.Variable(mel.cuda()) mel = torch.unsqueeze(mel, 0) mel = mel.half() if is_fp16 else mel with torch.no_grad(): audio = waveglow.infer(mel, sigma=sigma) if denoiser_strength > 0: audio = denoiser(audio, denoiser_strength) audio = audio * MAX_WAV_VALUE audio = audio.squeeze() audio = audio.cpu().numpy() audio = audio.astype('int16') audio_path = os.path.join( output_dir, "{}_synthesis.wav".format(file_name)) write(audio_path, sampling_rate, audio) print(audio_path)
Example #29
Source File: trainer.py From CAIL2019 with MIT License | 5 votes |
def _reset(self): self.start_epoch = 0 self.global_step = 0 self.model = self.model.to(self.device) if self.resume: resume_list = self.model_checkpoint.restore( self.model, self.optimizer ) self.model = resume_list[0] self.optimizer = resume_list[1] self.start_epoch = resume_list[2] self.model_checkpoint.best = resume_list[3] self.logger.info( f"Checkpoint (epoch {self.start_epoch}, best {self.model_checkpoint.best}) loaded" ) self.global_step = self.start_epoch * len(self.train_loader) self.model, self.optimizer = amp.initialize( self.model, self.optimizer, opt_level="O1", verbosity=0 ) if self.n_gpus > 1: self.model, self.device = model_device( self.model, self.n_gpus, self.logger )
Example #30
Source File: network_trainer.py From nnUNet with Apache License 2.0 | 4 votes |
def manage_patience(self): # update patience continue_training = True if self.patience is not None: # if best_MA_tr_loss_for_patience and best_epoch_based_on_MA_tr_loss were not yet initialized, # initialize them if self.best_MA_tr_loss_for_patience is None: self.best_MA_tr_loss_for_patience = self.train_loss_MA if self.best_epoch_based_on_MA_tr_loss is None: self.best_epoch_based_on_MA_tr_loss = self.epoch if self.best_val_eval_criterion_MA is None: self.best_val_eval_criterion_MA = self.val_eval_criterion_MA # check if the current epoch is the best one according to moving average of validation criterion. If so # then save 'best' model # Do not use this for validation. This is intended for test set prediction only. #self.print_to_log_file("current best_val_eval_criterion_MA is %.4f0" % self.best_val_eval_criterion_MA) #self.print_to_log_file("current val_eval_criterion_MA is %.4f" % self.val_eval_criterion_MA) if self.val_eval_criterion_MA > self.best_val_eval_criterion_MA: self.best_val_eval_criterion_MA = self.val_eval_criterion_MA #self.print_to_log_file("saving best epoch checkpoint...") self.save_checkpoint(join(self.output_folder, "model_best.model")) # Now see if the moving average of the train loss has improved. If yes then reset patience, else # increase patience if self.train_loss_MA + self.train_loss_MA_eps < self.best_MA_tr_loss_for_patience: self.best_MA_tr_loss_for_patience = self.train_loss_MA self.best_epoch_based_on_MA_tr_loss = self.epoch #self.print_to_log_file("New best epoch (train loss MA): %03.4f" % self.best_MA_tr_loss_for_patience) else: pass #self.print_to_log_file("No improvement: current train MA %03.4f, best: %03.4f, eps is %03.4f" % # (self.train_loss_MA, self.best_MA_tr_loss_for_patience, self.train_loss_MA_eps)) # if patience has reached its maximum then finish training (provided lr is low enough) if self.epoch - self.best_epoch_based_on_MA_tr_loss > self.patience: if self.optimizer.param_groups[0]['lr'] > self.lr_threshold: #self.print_to_log_file("My patience ended, but I believe I need more time (lr > 1e-6)") self.best_epoch_based_on_MA_tr_loss = self.epoch - self.patience // 2 else: #self.print_to_log_file("My patience ended") continue_training = False else: pass #self.print_to_log_file( # "Patience: %d/%d" % (self.epoch - self.best_epoch_based_on_MA_tr_loss, self.patience)) return continue_training