Python apex.amp() Examples

The following are 13 code examples of apex.amp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module apex , or try the search function .
Example #1
Source File: argus_models.py    From argus-freesound with MIT License 6 votes vote down vote up
def __init__(self, params):
        super().__init__(params)

        if 'aux' in params:
            self.aux_weights = params['aux']['weights']
        else:
            self.aux_weights = None

        self.use_amp = not config.kernel and 'amp' in params
        if self.use_amp:
            from apex import amp
            self.amp = amp
            self.nn_module, self.optimizer = self.amp.initialize(
                self.nn_module, self.optimizer,
                opt_level=params['amp']['opt_level'],
                keep_batchnorm_fp32=params['amp']['keep_batchnorm_fp32'],
                loss_scale=params['amp']['loss_scale']
            ) 
Example #2
Source File: distrib_parts.py    From pytorch-lightning with Apache License 2.0 6 votes vote down vote up
def single_gpu_train(self, model):
        # call setup
        self.setup('fit')
        if self.is_function_implemented('setup', model):
            model.setup('fit')

        model.cuda(self.root_gpu)

        # CHOOSE OPTIMIZER
        # allow for lr schedulers as well
        self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)

        # TODO: remove with dropping NVIDIA AMP support
        if self.use_amp and not NATIVE_AMP_AVALAIBLE:
            # An example
            model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
            self.optimizers = optimizers
            self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers)

        self.run_pretrain_routine(model) 
Example #3
Source File: argus_models.py    From argus-freesound with MIT License 5 votes vote down vote up
def train_step(self, batch)-> dict:
        if not self.nn_module.training:
            self.nn_module.train()
        self.optimizer.zero_grad()
        input, target, noisy = self.prepare_batch(batch, self.device)
        prediction = self.nn_module(input)
        if self.aux_weights is not None:
            loss = 0
            for pred, weight in zip(prediction, self.aux_weights):
                loss += self.loss(pred, target, noisy) * weight
        else:
            loss = self.loss(prediction, target, noisy)
        if self.use_amp:
            with self.amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optimizer.step()

        prediction = deep_detach(prediction)
        target = deep_detach(target)
        return {
            'prediction': self.prediction_transform(prediction[0]),
            'target': target,
            'loss': loss.item(),
            'noisy': noisy
        } 
Example #4
Source File: classification_task.py    From ClassyVision with MIT License 5 votes vote down vote up
def set_amp_args(self, amp_args: Optional[Dict[str, Any]]):
        """Disable / enable apex.amp and set the automatic mixed precision parameters.

        apex.amp can be utilized for mixed / half precision training.

        Args:
            amp_args: Dictionary containing arguments to be passed to
            amp.initialize. Set to None to disable amp.  To enable mixed
            precision training, pass amp_args={"opt_level": "O1"} here.
            See https://nvidia.github.io/apex/amp.html for more info.

        Raises:
            RuntimeError: If opt_level is not None and apex is not installed.

        Warning: apex needs to be installed to utilize this feature.
        """
        self.amp_args = amp_args

        if amp_args is None:
            logging.info(f"AMP disabled")
        else:
            if not apex_available:
                raise RuntimeError("apex is not installed, cannot enable amp")

            logging.info(f"AMP enabled with args {amp_args}")
        return self 
Example #5
Source File: classification_task.py    From ClassyVision with MIT License 5 votes vote down vote up
def get_classy_state(self, deep_copy: bool = False):
        """Returns serialiable state of task

        Args:
            deep_copy: If true, does a deep copy of state before returning.
        """
        optimizer_state = {}
        if self.optimizer is not None:
            optimizer_state = self.optimizer.get_classy_state()

        classy_state_dict = {
            "train": self.train,
            "base_model": self.base_model.get_classy_state(),
            "meters": [meter.get_classy_state() for meter in self.meters],
            "optimizer": optimizer_state,
            "phase_idx": self.phase_idx,
            "train_phase_idx": self.train_phase_idx,
            "num_updates": self.num_updates,
            "losses": self.losses,
            "hooks": {hook.name(): hook.get_classy_state() for hook in self.hooks},
            "loss": {},
        }
        if "train" in self.datasets and self._is_checkpointable_dataset(
            self.datasets["train"]
        ):
            classy_state_dict["train_dataset_iterator"] = self.datasets[
                "train"
            ].get_classy_state()

        if isinstance(self.loss, ClassyLoss):
            classy_state_dict["loss"] = self.loss.get_classy_state()
        if self.amp_args is not None:
            classy_state_dict["amp"] = apex.amp.state_dict()
        if deep_copy:
            classy_state_dict = copy.deepcopy(classy_state_dict)
        return classy_state_dict 
Example #6
Source File: utils.py    From fastNLP with Apache License 2.0 5 votes vote down vote up
def _check_fp16():
    if amp is None:
        raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
    if not torch.backends.cudnn.enabled:
        raise RuntimeError("Amp requires cudnn backend to be enabled.") 
Example #7
Source File: infer.py    From NLP_Toolkit with Apache License 2.0 5 votes vote down vote up
def evaluate_corpus_bleu(args, early_stopping=True, stop_no=1000):
    args.batch_size = 1
    train_iter, FR, EN, train_length = load_dataloaders(args)
    src_vocab = len(EN.vocab)
    trg_vocab = len(FR.vocab)
    
    cuda = torch.cuda.is_available()
    if args.fp16:    
        from apex import amp
    else:
        amp = None
    net, _, _, _, _, _ = load_model_and_optimizer(args, src_vocab, \
                                                  trg_vocab, cuda, amp=amp)
    
    net.eval()
    trg_init = FR.vocab.stoi["<sos>"]
    trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0)
    
    logger.info("Evaluating corpus bleu...")
    refs = []; hyps = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(train_iter), total=len(train_iter)):
            trg_input = trg_init
            labels = data.FR[:,1:].contiguous().view(-1)
            src_mask, trg_mask = create_masks(data.EN, trg_input)
            if cuda:
                data.EN = data.EN.cuda(); trg_input = trg_input.cuda(); labels = labels.cuda()
                src_mask = src_mask.cuda(); trg_mask = trg_mask.cuda()
            stepwise_translated_words, final_step_words = net(data.EN, trg_input, src_mask, None,\
                                                              infer=True, trg_vocab_obj=FR)
            refs.append([stepwise_translated_words]) # need to remove <eos> tokens
            hyps.append([FR.vocab.itos[i] for i in labels[:-1]])
            if early_stopping and ((i + 1) % stop_no == 0):
                print(refs); print(hyps)
                break
    score = calculate_bleu(refs, hyps, corpus_level=True)
    print("Corpus bleu score: %.5f" % score)
    return score 
Example #8
Source File: classification_task.py    From ClassyVision with MIT License 4 votes vote down vote up
def set_classy_state(self, state):
        """Set task state

        Args:
            state: Dict containing state of a task
        """
        # some settings are different in test only
        self.train = False if self.test_only else state["train"]
        if not self.test_only:
            self.phase_idx = state["phase_idx"]
            self.num_updates = state["num_updates"]
            self.train_phase_idx = state["train_phase_idx"]
            self.losses = state["losses"]
            for meter, meter_state in zip(self.meters, state["meters"]):
                meter.set_classy_state(meter_state)

        self.base_model.set_classy_state(state["base_model"])
        if self.optimizer is not None:
            self.optimizer.set_classy_state(state["optimizer"])
        if state.get("loss") and isinstance(self.loss, ClassyLoss):
            self.loss.set_classy_state(state["loss"])

        if "amp" in state:
            apex.amp.load_state_dict(state["amp"])

        for hook in self.hooks:
            # we still want to be able to run when new hooks are added or old
            # hooks are removed
            if hook.name() in state["hooks"]:
                hook.set_classy_state(state["hooks"][hook.name()])
            else:
                logging.warn(f"No state found for hook: {hook.name()}")

        if "train" in self.datasets and self._is_checkpointable_dataset(
            self.datasets["train"]
        ):
            self.datasets["train"].set_classy_state(state.get("train_dataset_iterator"))

        # TODO (mannatsingh): Figure out how to set the state of the dataloaders
        # Re-build dataloader & re-create iterator.
        self._recreate_data_loader_from_dataset()
        self.create_data_iterator()
        # Set up pytorch module in train vs eval mode, update optimizer.
        self._set_model_train_mode() 
Example #9
Source File: classification_task.py    From ClassyVision with MIT License 4 votes vote down vote up
def train_step(self):
        """Train step to be executed in train loop."""

        self.last_batch = None

        # Process next sample
        sample = next(self.get_data_iterator())

        assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
            f"Returned sample [{sample}] is not a map with 'input' and"
            + "'target' keys"
        )

        # Copy sample to GPU
        target = sample["target"]
        if self.use_gpu:
            sample = recursive_copy_to_gpu(sample, non_blocking=True)

        if self.mixup_transform is not None:
            sample = self.mixup_transform(sample)

        with torch.enable_grad():
            # Forward pass
            output = self.model(sample["input"])

            local_loss = self.compute_loss(output, sample)

            loss = local_loss.detach().clone()

            self.losses.append(loss.data.cpu().item() * target.size(0))

            self.update_meters(output, sample)

        # Run backwards pass / update optimizer
        if self.amp_args is not None:
            self.optimizer.zero_grad()
            with apex.amp.scale_loss(
                local_loss, self.optimizer.optimizer
            ) as scaled_loss:
                scaled_loss.backward()
        else:
            self.optimizer.backward(local_loss)

        self.check_inf_nan(loss)

        self.optimizer.update_schedule_on_step(self.where)
        self.optimizer.step()

        self.num_updates += self.get_global_batchsize()

        # Move some data to the task so hooks get a chance to access it
        self.last_batch = LastBatchInfo(
            loss=loss, output=output, target=target, sample=sample
        ) 
Example #10
Source File: infer.py    From NLP_Toolkit with Apache License 2.0 4 votes vote down vote up
def __init__(self, args=None):
        if args is None:
            self.args = load_pickle("args.pkl")
        else:
            self.args = args
        self.cuda = torch.cuda.is_available()
        self.args.batch_size = 1
        
        if self.args.model_no != 1:
            logger.info("Loading tokenizer and model...")
            self.tokenizer_en = tokener(args.src_lang)
            train_iter, FR, EN, train_length = load_dataloaders(self.args)
            self.FR = FR
            self.EN = EN
            self.train_iter = train_iter
            self.train_length = train_length
            self.src_vocab = len(EN.vocab)
            self.trg_vocab = len(FR.vocab)
            
            if self.args.fp16:    
                from apex import amp
            else:
                amp = None
            self.amp = amp
            net, _, _, _, _, _ = load_model_and_optimizer(self.args, self.src_vocab, \
                                                          self.trg_vocab, self.cuda, amp=amp)
            self.net = net
            self.net.eval()
            trg_init = FR.vocab.stoi["<sos>"]
            self.trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0)
        elif self.args.model_no == 1:
            from .mass.interactive import Translator
            src, tgt = "zh-en".split('-')
            logger.info("Loading translator, tokenizer...")
            self.translator = Translator(data_path='./data/data-bin/processed_data_%s_%s' % (src, tgt),\
                                         checkpoint_path="./data/checkpoints/%s_%s/checkpoint50.pt" % (src, tgt),\
                                         task='translation',\
                                         user_dir='',\
                                         s=src, t=tgt,\
                                         langs='%s,%s' % (src, tgt),\
                                         mt_steps='%s-%s' % (src, tgt),\
                                         source_langs=src,\
                                         target_langs=tgt,\
                                         beam=5,\
                                         use_cuda=args.cuda) 
Example #11
Source File: training_loop.py    From pytorch-lightning with Apache License 2.0 4 votes vote down vote up
def call_optimizer_step(self, optimizer, opt_idx, batch_idx, split_batch):
        # calls .step(), .zero_grad()
        # override function to modify this behavior
        model = self.get_model()

        with self.profiler.profile('optimizer_step'):
            lambda_closure = lambda: self.optimizer_closure(
                split_batch,
                batch_idx,
                opt_idx,
                optimizer,
                self.hiddens
            ).loss

            # apply TPU optimizer
            if self.use_tpu and XLA_AVAILABLE:
                model.optimizer_step(self.current_epoch, batch_idx,
                                     optimizer, opt_idx, lambda_closure, on_tpu=True)

            # for LBFGS do something a bit different
            elif isinstance(optimizer, torch.optim.LBFGS):

                # native amp + lbfgs is a no go right now
                if self.use_amp and NATIVE_AMP_AVALAIBLE:
                    raise MisconfigurationException(
                        'native PyTorch amp and lbfgs are not compatible.'
                        ' To request, please file a Github issue in PyTorch and tag @mcarilli')
                model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure,
                                     using_lbfgs=True)


            # when using 16-bit
            else:
                native_amp = self.use_amp and NATIVE_AMP_AVALAIBLE
                model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure,
                                     using_native_amp=native_amp)

            # in native 16-bit we need to update scaler after optimizer step
            if self.use_amp and NATIVE_AMP_AVALAIBLE:
                self.scaler.update()

            # model hook
            model.on_before_zero_grad(optimizer)

            # clear gradients
            model.optimizer_zero_grad(self.current_epoch, batch_idx, optimizer, opt_idx) 
Example #12
Source File: distrib_parts.py    From pytorch-lightning with Apache License 2.0 4 votes vote down vote up
def dp_train(self, model):
        # call setup after the ddp process has connected
        self.setup('fit')
        if self.is_function_implemented('setup', model):
            model.setup('fit')

        # CHOOSE OPTIMIZER
        # allow for lr schedulers as well
        self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)

        model.cuda(self.root_gpu)

        # hack forward to do autocast for the user
        model_autocast_original_forward = model.forward
        if self.use_amp and NATIVE_AMP_AVALAIBLE:
            # wrap the user's forward in autocast and give it back at the end
            model.forward = torch.cuda.amp.autocast()(model.forward)

        # TODO: remove with dropping NVIDIA AMP support
        # check for this bug (amp + dp + !01 doesn't work)
        # https://github.com/NVIDIA/apex/issues/227
        if self.use_dp and self.use_amp and not NATIVE_AMP_AVALAIBLE:
            if self.amp_level == 'O2':
                raise MisconfigurationException(
                    f'Amp level {self.amp_level} with DataParallel is not supported.'
                    f' See this note from NVIDIA for more info: https://github.com/NVIDIA/apex/issues/227.'
                    f' We recommend you switch to ddp if you want to use amp')
            else:
                model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
                self.reinit_scheduler_properties(optimizers, self.lr_schedulers)

        # create list of device ids
        device_ids = self.data_parallel_device_ids
        if isinstance(device_ids, int):
            device_ids = list(range(device_ids))

        # set dp device
        torch.cuda.set_device(self.root_gpu)

        model = LightningDataParallel(model, device_ids=device_ids)

        self.run_pretrain_routine(model)

        model.forward = model_autocast_original_forward 
Example #13
Source File: distrib_parts.py    From pytorch-lightning with Apache License 2.0 4 votes vote down vote up
def horovod_train(self, model):
        # call setup after the ddp process has connected
        self.setup('fit')
        if self.is_function_implemented('setup', model):
            model.setup('fit')

        if torch.cuda.is_available() and self.on_gpu:
            # Horovod: pin GPU to local rank
            assert self.root_gpu == hvd.local_rank()
            torch.cuda.set_device(self.root_gpu)
            model.cuda(self.root_gpu)

        # avoid duplicating progress bar
        if hvd.rank() != 0 and self.progress_bar_callback is not None:
            self.progress_bar_callback.disable()

        # CHOOSE OPTIMIZER
        # allow for lr schedulers as well
        self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)

        # Horovod: scale the learning rate by the number of workers to account for
        # increased total batch size
        for optimizer in self.optimizers:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= hvd.size()

        if self.use_amp:
            # An example
            model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
            self.optimizers = optimizers
            self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers)

        # Horovod: broadcast parameters & optimizer state to ensure consistent initialization
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        for optimizer in self.optimizers:
            hvd.broadcast_optimizer_state(optimizer, root_rank=0)

        def filter_named_parameters(model, optimizer):
            opt_params = set([p for group in optimizer.param_groups for p in group.get('params', [])])
            return [(name, p) for name, p in model.named_parameters() if p in opt_params]

        # Horovod: wrap optimizers to perform gradient aggregation via allreduce
        self.optimizers = [
            hvd.DistributedOptimizer(optimizer, named_parameters=filter_named_parameters(model, optimizer))
            for optimizer in self.optimizers
        ]

        # Update logger rank info from Horovod to avoid race conditions from  different ranks
        # creating directories / writing files in the same locations.
        self.global_rank = hvd.rank()
        rank_zero_only.rank = self.global_rank

        with ExitStack() as stack:
            for optimizer in self.optimizers:
                # Synchronization will be performed explicitly following backward()
                stack.enter_context(optimizer.skip_synchronize())

            self.run_pretrain_routine(model)

        # Make sure all workers have finished training before returning to the user
        hvd.join()