Python Examples of caffe2.python.workspace.FeedBlob

Source File: test_batch_permutation_op.py From Detectron with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From KL-Loss with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: test_spatial_narrow_as_op.py From KL-Loss with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: net.py From KL-Loss with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: loader.py From KL-Loss with Apache License 2.0

6 votes

def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

Source File: model_loader.py From VMZ with Apache License 2.0

6 votes

def BroacastParameters(model, src_gpu, gpus):

    log.info("Broadcasting parameters from gpu {} to gpu: {}".format(
        src_gpu, ','.join([str(g) for g in gpus]))
    )

    for param in model.params:
        if 'gpu_{}'.format(gpus[0]) in str(param):
            for i in gpus:
                blob = workspace.FetchBlob(str(param))
                target_blob_name = str(param).replace(
                    'gpu_{}'.format(src_gpu),
                    'gpu_{}'.format(i)
                )
                log.info('broadcast {} -> {}'.format(
                    str(param), target_blob_name)
                )
                workspace.FetchBlob(str(param))
                with core.DeviceScope(
                        core.DeviceOption(caffe2_pb2.CUDA, i)):
                    workspace.FeedBlob(target_blob_name, blob)

Source File: bn_helper.py From video-long-term-feature-banks with Apache License 2.0

6 votes

def _update_bn_stats_gpu(self):
        """
        Copy to GPU.
        Note: the actual blobs used at test time are "rm" and "riv"
        """

        num_gpus = cfg.NUM_GPUS
        root_gpu_id = cfg.ROOT_GPU_ID
        for i in range(root_gpu_id, root_gpu_id + num_gpus):
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, i)):
                for bn_layer in self._bn_layers:
                    workspace.FeedBlob(
                        'gpu_{}/'.format(i) + bn_layer + '_bn_rm',
                        np.array(self._meanX_dict[bn_layer], dtype=np.float32),
                    )
                    """
                    Note: riv is acutally running var (not running inv var)!!!!
                    """
                    workspace.FeedBlob(
                        'gpu_{}/'.format(i) + bn_layer + '_bn_riv',
                        np.array(self._var_dict[bn_layer], dtype=np.float32),
                    )

Source File: dataloader.py From video-long-term-feature-banks with Apache License 2.0

6 votes

def enqueue_blobs(
        self,
        gpu_id,
        enqueue_blobs_names,
        blob_values,
    ):
        enqueue_blobs_names = [
            'gpu_{}/{}'.format(
                gpu_id, enqueue_blob_name
            ) for enqueue_blob_name in enqueue_blobs_names
        ]

        deviceOption = core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
        for (blob_name, blob) in zip(enqueue_blobs_names, blob_values):
            workspace.FeedBlob(blob_name, blob, device_option=deviceOption)

        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        workspace.RunOperatorOnce(
            core.CreateOperator(
                'SafeEnqueueBlobs',
                [queue_name] + enqueue_blobs_names,
                enqueue_blobs_names + [queue_name + '_enqueue_status'],
                device_option=deviceOption,
            )
        )

Source File: loader.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

Source File: net.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: test_spatial_narrow_as_op.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From Clustered-Object-Detection-in-Aerial-Image with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: run_add5_op.py From tutorials with Apache License 2.0

6 votes

def run_add5_and_add5gradient_op(device):
    # clear the workspace before running the operator
    workspace.ResetWorkspace()
    add5 = core.CreateOperator("Add5",
                               ["X"],
                               ["Y"],
                               device_option=device)
    print("==> Running Add5 op:")
    workspace.FeedBlob("X", (np.random.rand(5, 5)), device_option=device)
    print("Input of Add5: ", workspace.FetchBlob("X"))
    workspace.RunOperatorOnce(add5)
    print("Output of Add5: ", workspace.FetchBlob("Y"))

    print("\n\n==> Running Add5Gradient op:")
    print("Input of Add5Gradient: ", workspace.FetchBlob("Y"))
    add5gradient = core.CreateOperator("Add5Gradient",
                                       ["Y"],
                                       ["Z"],
                                       device_option=device)
    workspace.RunOperatorOnce(add5gradient)
    print("Output of Add5Gradient: ", workspace.FetchBlob("Z"))

Source File: test_batch_permutation_op.py From seg_every_thing with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From seg_every_thing with Apache License 2.0

6 votes

def _run_speed_test(self, iters=5, N=1024):
        """This function provides an example of how to benchmark custom
        operators using the Caffe2 'prof_dag' network execution type. Please
        note that for 'prof_dag' to work, Caffe2 must be compiled with profiling
        support using the `-DUSE_PROF=ON` option passed to `cmake` when building
        Caffe2.
        """
        net = core.Net('test')
        net.Proto().type = 'prof_dag'
        net.Proto().num_workers = 2
        Y = net.BatchPermutation(['X', 'I'], 'Y')
        Y_flat = net.FlattenToVec([Y], 'Y_flat')
        loss = net.AveragedLoss([Y_flat], 'loss')
        net.AddGradientOperators([loss])
        workspace.CreateNet(net)

        X = np.random.randn(N, 256, 14, 14)
        for _i in range(iters):
            I = np.random.permutation(N)
            workspace.FeedBlob('X', X.astype(np.float32))
            workspace.FeedBlob('I', I.astype(np.int32))
            workspace.RunNet(net.Proto().name)
            np.testing.assert_allclose(
                workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08
            )

Source File: net.py From seg_every_thing with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: test_spatial_narrow_as_op.py From masktextspotter.caffe2 with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From masktextspotter.caffe2 with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From masktextspotter.caffe2 with Apache License 2.0

6 votes

def _run_speed_test(self, iters=5, N=1024):
        """This function provides an example of how to benchmark custom
        operators using the Caffe2 'prof_dag' network execution type. Please
        note that for 'prof_dag' to work, Caffe2 must be compiled with profiling
        support using the `-DUSE_PROF=ON` option passed to `cmake` when building
        Caffe2.
        """
        net = core.Net('test')
        net.Proto().type = 'prof_dag'
        net.Proto().num_workers = 2
        Y = net.BatchPermutation(['X', 'I'], 'Y')
        Y_flat = net.FlattenToVec([Y], 'Y_flat')
        loss = net.AveragedLoss([Y_flat], 'loss')
        net.AddGradientOperators([loss])
        workspace.CreateNet(net)

        X = np.random.randn(N, 256, 14, 14)
        for _i in range(iters):
            I = np.random.permutation(N)
            workspace.FeedBlob('X', X.astype(np.float32))
            workspace.FeedBlob('I', I.astype(np.int32))
            workspace.RunNet(net.Proto().name)
            np.testing.assert_allclose(
                workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08
            )

Source File: mix_loader.py From masktextspotter.caffe2 with Apache License 2.0

6 votes

def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

Source File: net.py From masktextspotter.caffe2 with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: loader.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

Source File: net.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: test_spatial_narrow_as_op.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: test_batch_permutation_op.py From Detectron-Cascade-RCNN with Apache License 2.0

6 votes

def _run_op_test(self, X, I, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
            workspace.FeedBlob('X', X)
            workspace.FeedBlob('I', I)
        workspace.RunOperatorOnce(op)
        Y = workspace.FetchBlob('Y')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.1,
                threshold=0.001,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        Y_ref = X[I]
        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)

Source File: loader.py From Detectron with Apache License 2.0

6 votes

def enqueue_blobs(self, gpu_id, blob_names, blobs):
        """Put a mini-batch on a BlobsQueue."""
        assert len(blob_names) == len(blobs)
        t = time.time()
        dev = c2_utils.CudaDevice(gpu_id)
        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
        for (blob_name, blob) in zip(blob_names, blobs):
            workspace.FeedBlob(blob_name, blob, device_option=dev)
        logger.debug(
            'enqueue_blobs {}: workspace.FeedBlob: {}'.
            format(gpu_id, time.time() - t)
        )
        t = time.time()
        op = core.CreateOperator(
            'SafeEnqueueBlobs', [queue_name] + blob_names,
            blob_names + [queue_name + '_enqueue_status'],
            device_option=dev
        )
        workspace.RunOperatorOnce(op)
        logger.debug(
            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
            format(gpu_id, time.time() - t)
        )

Source File: net.py From Detectron with Apache License 2.0

6 votes

def broadcast_parameters(model):
    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
    on GPUs 1 through cfg.NUM_GPUS - 1.
    """
    if cfg.NUM_GPUS == 1:
        # no-op if only running on a single GPU
        return

    def _do_broadcast(all_blobs):
        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
            ('Unexpected value for NUM_GPUS. Make sure you are not '
             'running single-GPU inference with NUM_GPUS > 1.')
        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
        for i in range(blobs_per_gpu):
            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
            data = workspace.FetchBlob(blobs[0])
            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
            for i, p in enumerate(blobs[1:]):
                logger.debug(' |-> {}'.format(str(p)))
                with c2_utils.CudaScope(i + 1):
                    workspace.FeedBlob(p, data)

    _do_broadcast(model.params)
    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])

Source File: test_spatial_narrow_as_op.py From Detectron with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: test_spatial_narrow_as_op.py From seg_every_thing with Apache License 2.0

6 votes

def _run_test(self, A, B, check_grad=False):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
            workspace.FeedBlob('A', A)
            workspace.FeedBlob('B', B)
        workspace.RunOperatorOnce(op)
        C = workspace.FetchBlob('C')

        if check_grad:
            gc = gradient_checker.GradientChecker(
                stepsize=0.005,
                threshold=0.005,
                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
            )

            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
            self.assertTrue(res, 'Grad check failed')

        dims = C.shape
        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)

Source File: detector.py From masktextspotter.caffe2 with Apache License 2.0

5 votes

def _SetNewLr(self, cur_lr, new_lr):
        """Do the actual work of updating the model and workspace blobs.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob(
                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

Python caffe2.python.workspace.FeedBlob() Examples