Python Examples of detectron.utils.c2.CudaScope

Source File: net.py From Detectron with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From CBNet with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From CBNet with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: net.py From CBNet with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From Detectron-DA-Faster-RCNN with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From Detectron-DA-Faster-RCNN with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: net.py From Detectron-DA-Faster-RCNN with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From Detectron with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From Detectron with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: net.py From KL-Loss with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: net.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: net.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: detector.py From KL-Loss with Apache License 2.0

6 votes

def _CorrectMomentum(self, correction):
        """The MomentumSGDUpdate op implements the update V as

            V := mu * V + lr * grad,

        where mu is the momentum factor, lr is the learning rate, and grad is
        the stochastic gradient. Since V is not defined independently of the
        learning rate (as it should ideally be), when the learning rate is
        changed we should scale the update history V in order to make it
        compatible in scale with lr * grad.
        """
        logger.info(
            'Scaling update history by {:.6f} (new lr / old lr)'.
            format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator(
                        'Scale', [param + '_momentum'], [param + '_momentum'],
                        scale=correction)
                    workspace.RunOperatorOnce(op)

Source File: optimizer.py From KL-Loss with Apache License 2.0

6 votes

def _add_allreduce_graph(model):
    """Construct the graph that performs Allreduce on the gradients."""
    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # The model parameters are replicated on each GPU, get the number
    # distinct parameter blobs (i.e., the number of parameter blobs on
    # each GPU)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # Iterate over distinct parameter blobs
        for i in range(params_per_gpu):
            # Gradients from all GPUs for this parameter blob
            gradients = [
                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
            ]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')

Source File: detector.py From Detectron-Cascade-RCNN with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Source File: detector.py From Detectron with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Source File: detector.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Source File: detector.py From Detectron-DA-Faster-RCNN with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Source File: detector.py From KL-Loss with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Source File: detector.py From CBNet with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Python detectron.utils.c2.CudaScope() Examples