Python apex.amp.initialize() Examples

The following are 30 code examples of apex.amp.initialize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module apex.amp , or try the search function .
Example #1
Source File: test_larc.py    From apex with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_larc_mixed_precision(self):
        for opt_level in ["O0", "O1", "O2", "O3"]:
            model = MyModel(1)

            optimizer = LARC(
                torch.optim.SGD(
                    [{"params": model.parameters(), "lr": 0.25}], momentum=0.125
                )
            )

            model, optimizer = amp.initialize(
                model, optimizer, opt_level=opt_level, verbosity=0
            )

            optimizer.zero_grad()
            loss = model(self.x)
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step() 
Example #2
Source File: test_lr_finder.py    From pytorch-lr-finder with MIT License 6 votes vote down vote up
def test_gradient_accumulation_with_apex_amp(self, mocker):
        desired_bs, accum_steps = 32, 4
        real_bs = desired_bs // accum_steps
        num_iter = 10
        task = mod_task.XORTask(batch_size=real_bs)

        # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
        # CUDA GPU. So we have to move model to GPU first.
        model, optimizer, device = task.model, task.optimizer, task.device
        model = model.to(device)
        task.model, task.optimizer = amp.initialize(model, optimizer)

        lr_finder = prepare_lr_finder(task)
        spy = mocker.spy(amp, "scale_loss")

        lr_finder.range_test(
            task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
        )
        assert spy.call_count == accum_steps * num_iter 
Example #3
Source File: test_lr_finder.py    From pytorch-lr-finder with MIT License 6 votes vote down vote up
def test_mixed_precision(self, mocker):
        batch_size = 32
        num_iter = 10
        task = mod_task.XORTask(batch_size=batch_size)

        # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
        # CUDA GPU. So we have to move model to GPU first.
        model, optimizer, device = task.model, task.optimizer, task.device
        model = model.to(device)
        task.model, task.optimizer = amp.initialize(model, optimizer)
        assert hasattr(task.optimizer, "_amp_stash")

        lr_finder = prepare_lr_finder(task)
        spy = mocker.spy(amp, "scale_loss")

        lr_finder.range_test(task.train_loader, num_iter=num_iter)
        # NOTE: Here we did not perform gradient accumulation, so that call count
        # of `amp.scale_loss` should equal to `num_iter`.
        assert spy.call_count == num_iter 
Example #4
Source File: mixed_precision.py    From amdim-public with MIT License 6 votes vote down vote up
def initialize(model, optimizers):
    """Initialize mixed precision

    Arguments:
        model {nn.Module} -- The model to convert
        optimizers -- The model

    Returns:
        [nn.Module, Optimizer] -- Converted model and optimizer
    """
    if is_mixed_precision():
        from apex import amp
        if optimizers is not None:
            model, optimizers = \
                amp.initialize(model, optimizers, opt_level=get_optim_level())
        else:
            model = amp.initialize(model, opt_level=get_optim_level())
    return model, optimizers 
Example #5
Source File: trace.py    From catalyst with Apache License 2.0 5 votes vote down vote up
def load_traced_model(
    model_path: Union[str, Path],
    device: Device = "cpu",
    opt_level: str = None,
) -> jit.ScriptModule:
    """Loads a traced model.

    Args:
        model_path: Path to traced model
        device (str): Torch device
        opt_level (str): Apex FP16 init level, optional

    Returns:
        ScriptModule: Traced model
    """
    # jit.load dont work with pathlib.Path
    model_path = str(model_path)

    if opt_level is not None:
        device = "cuda"

    model = jit.load(model_path, map_location=device)

    if opt_level is not None:
        assert_fp16_available()
        from apex import amp

        model = amp.initialize(model, optimizers=None, opt_level=opt_level)

    return model 
Example #6
Source File: utils.py    From machina with MIT License 5 votes vote down vote up
def make_model_distributed(model, optim,
                           use_apex=False,
                           apex_opt_level="O0",
                           apex_keep_batchnorm_fp32=True,
                           apex_sync_bn=False,
                           apex_loss_scale=None,
                           device_ids=None, output_device=None,
                           ):
    """Return model for distributed trainings.
    Note that returned model shares parameters with the original model.
    """
    if use_apex:
        global amp
        global apex
        import apex.parallel
        from apex import amp
        ddp_model, optim = amp.initialize(model, optim,
                                          opt_level=apex_opt_level,
                                          keep_batchnorm_fp32=apex_keep_batchnorm_fp32,
                                          loss_scale=apex_loss_scale)
        ddp_cls = wrap_ddp(apex.parallel.DistributedDataParallel)
        ddp_model = ddp_cls(ddp_model)
        if apex_sync_bn:
            ddp_model = apex.parallel.convert_syncbn_model(model)
    else:
        ddp_cls = wrap_ddp(nn.parallel.DistributedDataParallel)
        ddp_model = ddp_cls(model, device_ids, output_device)

    return ddp_model, optim 
Example #7
Source File: network_trainer.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def initialize(self, training=True):
        """
        create self.output_folder

        modify self.output_folder if you are doing cross-validation (one folder per fold)

        set self.tr_gen and self.val_gen

        call self.initialize_network and self.initialize_optimizer_and_scheduler (important!)

        finally set self.was_initialized to True
        :param training:
        :return:
        """ 
Example #8
Source File: network_trainer.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def load_checkpoint(self, fname, train=True):
        self.print_to_log_file("loading checkpoint", fname, "train=", train)
        if not self.was_initialized:
            self.initialize(train)
        # saved_model = torch.load(fname, map_location=torch.device('cuda', torch.cuda.current_device()))
        saved_model = torch.load(fname, map_location=torch.device('cpu'))
        self.load_checkpoint_ram(saved_model, train) 
Example #9
Source File: network_trainer.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def initialize_network(self):
        """
        initialize self.network here
        :return:
        """
        pass 
Example #10
Source File: network_trainer.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def initialize_optimizer_and_scheduler(self):
        """
        initialize self.optimizer and self.lr_scheduler (if applicable) here
        :return:
        """
        pass 
Example #11
Source File: nnUNetTrainerV2_Mish.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def _maybe_init_amp(self):
        """
        In O1 mish will result in super super high memory usage. I believe that may be because amp decides to be save
        and use fp32 for all activation functions. By using O2 we reduce memory comsumption by a lot
        :return:
        """
        # we use fp16 for training only, not inference
        if self.fp16 and torch.cuda.is_available():
            if not self.amp_initialized:
                if amp is not None:
                    self.network, self.optimizer = amp.initialize(self.network, self.optimizer, opt_level="O2")
                    self.amp_initialized = True
                else:
                    self.print_to_log_file("WARNING: FP16 training was requested but nvidia apex is not installed. "
                                           "Install it from https://github.com/NVIDIA/apex") 
Example #12
Source File: nnUNetTrainerV2_O2.py    From nnUNet with Apache License 2.0 5 votes vote down vote up
def _maybe_init_amp(self):
        if self.fp16 and torch.cuda.is_available():
            if not self.amp_initialized:
                if amp is not None:
                    self.network, self.optimizer = amp.initialize(self.network, self.optimizer, opt_level="O1")
                    self.amp_initialized = True
                else:
                    raise RuntimeError("WARNING: FP16 training was requested but nvidia apex is not installed. "
                                       "Install it from https://github.com/NVIDIA/apex") 
Example #13
Source File: optimization.py    From FARM with Apache License 2.0 5 votes vote down vote up
def _init_amp(model, device, optimizer=None, use_amp=None):
    model = model.to(device)
    if use_amp and optimizer:
        if AMP_AVAILABLE:
            model, optimizer = amp.initialize(model, optimizer, opt_level=use_amp)
        else:
            logger.warning(f"Can't find AMP although you specificed to use amp with level {use_amp}. Will continue without AMP ...")

    return model, optimizer 
Example #14
Source File: training.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def initialize(config):

    model = config.model.to(config.device)
    optimizer = config.optimizer
    # Setup Nvidia/Apex AMP
    model, optimizer = amp.initialize(model, optimizer, opt_level=getattr(config, "fp16_opt_level", "O2"), num_losses=1)

    # Adapt model to dist conf
    model = idist.auto_model(model)

    criterion = config.criterion.to(config.device)

    return model, optimizer, criterion 
Example #15
Source File: training.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def initialize(config):

    model = config.model.to(config.device)
    optimizer = config.optimizer
    # Setup Nvidia/Apex AMP
    model, optimizer = amp.initialize(model, optimizer, opt_level=getattr(config, "fp16_opt_level", "O2"), num_losses=1)

    # Adapt model to dist conf
    model = idist.auto_model(model)

    criterion = config.criterion.to(config.device)

    return model, optimizer, criterion 
Example #16
Source File: test_checkpointing.py    From apex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_state_dict(self):
        for opt_level in self.test_opt_levels:
            # Skip O3
            if opt_level == 'O3':
                continue

            model = MyModel().to('cuda')
            optimizer = optim.Adam(model.parameters(), lr=1e-3)
            model, optimizer = amp.initialize(
                model, optimizer, opt_level=opt_level, verbosity=0)

            # Export state_dict and check for Half
            state_dict = model.state_dict()
            for key in state_dict:
                self.assertFalse('Half' in state_dict[key].type())

            # Check, if model is still trainable
            # Create dummy data
            data = torch.randn(10, 3, 4, 4, device='cuda')
            target = torch.randn(10, 6, 4, 4, device='cuda')
            
            # Get initnial loss
            optimizer.zero_grad()
            output = model(data)
            loss = F.mse_loss(output, target)
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()
            last_loss = loss.item()

            # train for some epochs
            for epoch in range(10):
                optimizer.zero_grad()
                output = model(data)
                loss = F.mse_loss(output, target)
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                optimizer.step()
                self.assertTrue(loss.item() < last_loss)
                last_loss = loss.item() 
Example #17
Source File: keker.py    From kekas with MIT License 5 votes vote down vote up
def to_fp16(self, **amp_params):
        """Use NVIDIA apex library for mixed precision training.
        After calling this method, all operations will be used in mixed precision.

        Returns:
            self
        """
        amp_params = dict({"opt_level": "O1", "verbosity": 0}, **amp_params)
        self.state.core.amp_params = amp_params
        self.state.core.model = amp.initialize(self.state.core.model, **amp_params)
        self.state.core.use_fp16 = True
        return self 
Example #18
Source File: training.py    From tape with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def initialize_fp16(self):
        if self.fp16:
            self.model, self.optimizer = amp.initialize(
                self.model, self.optimizer, opt_level="O2", loss_scale="dynamic",
                master_weights=True)
            _amp_state.loss_scalers[0]._loss_scale = 2 ** 20 
Example #19
Source File: main.py    From kaggle-rcic-1st with MIT License 5 votes vote down vote up
def predict(args, model):
    """Entrypoint for predict mode"""

    test_loader = dataset.get_test_loader(args)
    train_loader, val_loader = dataset.get_train_val_loader(args, predict=True)

    if args.fp16:
        model = amp.initialize(model, opt_level='O1')

    logging.info('Starting prediction')

    output = {}
    for k, loader in [('test', test_loader),
                      ('val', val_loader)]:
        output[k] = {}
        res = infer(args, model, loader)

        for i, v in res.items():
            d = loader.dataset.data[i]
            name = '{}_{}_{}'.format(d[0], d[1], d[2])
            if name not in output[k]:
                output[k][name] = []
            output[k][name].append(v)

    logging.info('Saving predictions to {}'.format(args.load + '.output' + args.pred_suffix))
    with open(args.load + '.output' + args.pred_suffix, 'wb') as file:
        pickle.dump(output, file) 
Example #20
Source File: train_ddp.py    From space_time_pde with MIT License 5 votes vote down vote up
def setup(rank, world_size, offset=0):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = str(12355+offset)

    # initialize the process group
    dist.init_process_group("gloo", rank=rank, world_size=world_size)

    # Explicitly setting seed to make sure that models created in two processes
    # start from same random weights and biases.
    torch.manual_seed(42) 
Example #21
Source File: run_seq2seq.py    From unilm with MIT License 5 votes vote down vote up
def prepare_for_training(args, model, checkpoint_state_dict, amp):
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'weight_decay': args.weight_decay},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)

    if amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)
        if checkpoint_state_dict:
            amp.load_state_dict(checkpoint_state_dict['amp'])

    if checkpoint_state_dict:
        optimizer.load_state_dict(checkpoint_state_dict['optimizer'])
        model.load_state_dict(checkpoint_state_dict['model'])

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True)

    return model, optimizer 
Example #22
Source File: wrapper.py    From pytorch-tools with MIT License 5 votes vote down vote up
def __init__(
        self, model, optimizer, criterion, metrics=None, callbacks=ConsoleLogger(), gradient_clip_val=0, accumulate_steps=1,
    ):
        super().__init__()

        if not hasattr(amp._amp_state, "opt_properties"):
            model_optimizer = amp.initialize(model, optimizer, enabled=False)
            model, optimizer = (model_optimizer, None) if optimizer is None else model_optimizer

        self.state = RunnerState(model=model, optimizer=optimizer, criterion=criterion, metrics=metrics,)
        self.callbacks = Callbacks(callbacks)
        self.callbacks.set_state(self.state)
        self.gradient_clip_val = gradient_clip_val
        self.accumulate_steps = accumulate_steps 
Example #23
Source File: train.py    From DeepPrivacy with MIT License 5 votes vote down vote up
def load_checkpoint(self):
        try:
            map_location = "cuda:0" if torch.cuda.is_available() else "cpu"
            ckpt = load_checkpoint(self.checkpoint_dir,
                                   map_location=map_location)
            # Transition settings
            self.is_transitioning = ckpt["is_transitioning"]
            self.transition_step = ckpt["transition_step"]
            self.current_imsize = ckpt["current_imsize"]
            self.latest_switch = ckpt["latest_switch"]

            # Tracking stats
            self.global_step = ckpt["global_step"]
            self.start_time = time.time() - ckpt["total_time"] * 60
            self.num_skipped_steps = ckpt["num_skipped_steps"]

            # Models
            self.discriminator.load_state_dict(ckpt['D'])

            self.generator.load_state_dict(ckpt['G'])
            self.running_average_generator.load_state_dict(
                ckpt["running_average_generator"])
            to_cuda([self.generator, self.discriminator,
                     self.running_average_generator])
            self.running_average_generator = amp.initialize(self.running_average_generator,
                                                            None, opt_level=self.opt_level)
            self.init_optimizers()
            self.d_optimizer.load_state_dict(ckpt['d_optimizer'])
            self.g_optimizer.load_state_dict(ckpt['g_optimizer'])
            return True
        except FileNotFoundError as e:
            print(e)
            print(' [*] No checkpoint!')
            return False 
Example #24
Source File: train.py    From DeepPrivacy with MIT License 5 votes vote down vote up
def init_running_average_generator(self):
        self.running_average_generator = Generator(self.pose_size,
                                                   self.start_channel_size,
                                                   self.image_channels)
        self.running_average_generator = wrap_models(
            self.running_average_generator)
        to_cuda(self.running_average_generator)
        self.running_average_generator = amp.initialize(self.running_average_generator,
                                                        None, opt_level=self.opt_level) 
Example #25
Source File: train.py    From DeepPrivacy with MIT License 5 votes vote down vote up
def extend_running_average_generator(self):
        g = self.running_average_generator
        g.extend()

        for avg_param, cur_param in zip(g.new_parameters(), self.generator.new_parameters()):
            assert avg_param.data.shape == cur_param.data.shape, "AVG param: {}, cur_param: {}".format(
                avg_param.shape, cur_param.shape)
            avg_param.data = cur_param.data
        to_cuda(g)
        self.running_average_generator = amp.initialize(
            self.running_average_generator, None, opt_level=self.opt_level) 
Example #26
Source File: train.py    From DeepPrivacy with MIT License 5 votes vote down vote up
def initialize_amp(self):
        to_cuda([self.generator, self.discriminator])
        [self.generator, self.discriminator], [self.g_optimizer, self.d_optimizer] = amp.initialize(
            [self.generator, self.discriminator],
            [self.g_optimizer, self.d_optimizer],
            opt_level=self.opt_level,
            num_losses=4) 
Example #27
Source File: torch_runner.py    From ray with Apache License 2.0 5 votes vote down vote up
def _try_setup_apex(self):
        """Sets up the model for fp16 training via apex if available."""
        if self.use_fp16 and amp:
            self.models, self.optimizers = amp.initialize(
                self.models, self.optimizers, **self.apex_args) 
Example #28
Source File: inference.py    From waveglow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, file_path in enumerate(mel_files):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        mel = torch.load(file_path)
        mel = torch.autograd.Variable(mel.cuda())
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(
            output_dir, "{}_synthesis.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        print(audio_path) 
Example #29
Source File: trainer.py    From CAIL2019 with MIT License 5 votes vote down vote up
def _reset(self):
        self.start_epoch = 0
        self.global_step = 0

        self.model = self.model.to(self.device)

        if self.resume:
            resume_list = self.model_checkpoint.restore(
                self.model, self.optimizer
            )

            self.model = resume_list[0]
            self.optimizer = resume_list[1]
            self.start_epoch = resume_list[2]
            self.model_checkpoint.best = resume_list[3]
            self.logger.info(
                f"Checkpoint (epoch {self.start_epoch}, best {self.model_checkpoint.best}) loaded"
            )
            self.global_step = self.start_epoch * len(self.train_loader)

        self.model, self.optimizer = amp.initialize(
            self.model, self.optimizer, opt_level="O1", verbosity=0
        )
        if self.n_gpus > 1:
            self.model, self.device = model_device(
                self.model, self.n_gpus, self.logger
            ) 
Example #30
Source File: network_trainer.py    From nnUNet with Apache License 2.0 4 votes vote down vote up
def manage_patience(self):
        # update patience
        continue_training = True
        if self.patience is not None:
            # if best_MA_tr_loss_for_patience and best_epoch_based_on_MA_tr_loss were not yet initialized,
            # initialize them
            if self.best_MA_tr_loss_for_patience is None:
                self.best_MA_tr_loss_for_patience = self.train_loss_MA

            if self.best_epoch_based_on_MA_tr_loss is None:
                self.best_epoch_based_on_MA_tr_loss = self.epoch

            if self.best_val_eval_criterion_MA is None:
                self.best_val_eval_criterion_MA = self.val_eval_criterion_MA

            # check if the current epoch is the best one according to moving average of validation criterion. If so
            # then save 'best' model
            # Do not use this for validation. This is intended for test set prediction only.
            #self.print_to_log_file("current best_val_eval_criterion_MA is %.4f0" % self.best_val_eval_criterion_MA)
            #self.print_to_log_file("current val_eval_criterion_MA is %.4f" % self.val_eval_criterion_MA)

            if self.val_eval_criterion_MA > self.best_val_eval_criterion_MA:
                self.best_val_eval_criterion_MA = self.val_eval_criterion_MA
                #self.print_to_log_file("saving best epoch checkpoint...")
                self.save_checkpoint(join(self.output_folder, "model_best.model"))

            # Now see if the moving average of the train loss has improved. If yes then reset patience, else
            # increase patience
            if self.train_loss_MA + self.train_loss_MA_eps < self.best_MA_tr_loss_for_patience:
                self.best_MA_tr_loss_for_patience = self.train_loss_MA
                self.best_epoch_based_on_MA_tr_loss = self.epoch
                #self.print_to_log_file("New best epoch (train loss MA): %03.4f" % self.best_MA_tr_loss_for_patience)
            else:
                pass
                #self.print_to_log_file("No improvement: current train MA %03.4f, best: %03.4f, eps is %03.4f" %
                #                       (self.train_loss_MA, self.best_MA_tr_loss_for_patience, self.train_loss_MA_eps))

            # if patience has reached its maximum then finish training (provided lr is low enough)
            if self.epoch - self.best_epoch_based_on_MA_tr_loss > self.patience:
                if self.optimizer.param_groups[0]['lr'] > self.lr_threshold:
                    #self.print_to_log_file("My patience ended, but I believe I need more time (lr > 1e-6)")
                    self.best_epoch_based_on_MA_tr_loss = self.epoch - self.patience // 2
                else:
                    #self.print_to_log_file("My patience ended")
                    continue_training = False
            else:
                pass
                #self.print_to_log_file(
                #    "Patience: %d/%d" % (self.epoch - self.best_epoch_based_on_MA_tr_loss, self.patience))

        return continue_training