Python mxnet.MXNetError() Examples

The following are 11 code examples of mxnet.MXNetError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mxnet , or try the search function .
Example #1
Source File: dist_sync_kvstore.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_invalid_operations():
    def check_invalid_gluon_trainer_reset():
        params = mx.gluon.ParameterDict()
        x = params.get('x', shape=(4, 2), lr_mult=1.0, stype='row_sparse')
        params.initialize(ctx=mx.cpu(0), init='zeros')
        trainer = mx.gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv)
        params.save('test_gluon_trainer_reset_' + str(my_rank) + '.params')
        row_id = mx.nd.arange(0, 4)
        w = x.row_sparse_data(row_id)
        assert trainer._kv_initialized and trainer._update_on_kvstore
        mx.nd.waitall()
        # load would fail to reset kvstore since update_on_kvstore is True
        assert_exception(params.load, RuntimeError, 'test_gluon_trainer_reset_' + str(my_rank) + '.params')
        print('worker ' + str(my_rank) + ' passed check_invalid_gluon_trainer_reset')

    def check_invalid_pull():
        kv.init(keys_invalid[0], mx.nd.ones((2,2)).tostype('row_sparse'))
        out = mx.nd.ones((2,2)).tostype('row_sparse')
        assert_exception(kv.pull, mx.MXNetError, 'invalid_key', out=out, ignore_sparse=False)
        print('worker ' + str(my_rank) + ' passed check_invalid_pull')

    check_invalid_gluon_trainer_reset()
    check_invalid_pull() 
Example #2
Source File: test_metric_perf.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs):
    """ Helper function for running one metric benchmark """
    metric = mx.metric.create(name, **kwargs)
    data_gen = data_gen_cls(n, c, pred_ctx, label_ctx)
    try:
        label, pred = data_gen.data()
        mx.nd.waitall()
        before = time.time()
        metric.update([label] * i, [pred] * i)
        mx.nd.waitall()
        elapsed = time.time() - before
        elapsed_str = "{:<.5}".format(elapsed)
    except mx.MXNetError:
        elapsed_str = "FAILED"
    print("{metric:<15}{pctx:<10}{lctx:<12}{niter:<12}{bs:<15}{out_dim:<15}{elapsed:<}".format(
        metric=name, pctx=str(pred_ctx), lctx=str(label_ctx), niter=i * n, bs=data_gen.batch_size,
        out_dim=data_gen.output_dim, elapsed=elapsed_str), file=sys.stderr) 
Example #3
Source File: utils.py    From sockeye with Apache License 2.0 6 votes vote down vote up
def get_gpu_memory_usage(ctx: Union[mx.context.Context, List[mx.context.Context]]) -> Dict[int, Tuple[int, int]]:
    """
    Returns used and total memory for GPUs identified by the given context list.

    :param ctx: List of MXNet context devices.
    :return: Dictionary of device id mapping to a tuple of (memory used, memory total).
    """
    if isinstance(ctx, mx.context.Context):
        ctx = [ctx]
    ctx = [c for c in ctx if c.device_type == 'gpu']
    if not ctx:
        return {}

    memory_data = {}  # type: Dict[int, Tuple[int, int]]
    for c in ctx:
        try:
            free, total = mx.context.gpu_memory_info(device_id=c.device_id)  # in bytes
            used = total - free
            memory_data[c.device_id] = (used * 1e-06, total * 1e-06)
        except mx.MXNetError:
            logger.exception("Failed retrieving memory data for gpu%d", c.device_id)
            continue
    log_gpu_memory_usage(memory_data)
    return memory_data 
Example #4
Source File: utils.py    From datawig with Apache License 2.0 6 votes vote down vote up
def get_context() -> mx.context:
    """

    Returns the a list of all available gpu contexts for a given machine.
    If no gpus are available, returns [mx.cpu()].
    Use it to automatically return MxNet contexts (uses max number of gpus or cpu)

    :return: List of mxnet contexts of a gpu or [mx.cpu()] if gpu not available

    """
    context_list = []
    for gpu_number in range(16):
        try:
            _ = mx.nd.array([1, 2, 3], ctx=mx.gpu(gpu_number))
            context_list.append(mx.gpu(gpu_number))
        except mx.MXNetError:
            pass

    if len(context_list) == 0:
        context_list.append(mx.cpu())

    return context_list 
Example #5
Source File: test_metric_perf.py    From SNIPER-mxnet with Apache License 2.0 6 votes vote down vote up
def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs):
    """ Helper function for running one metric benchmark """
    metric = mx.metric.create(name, **kwargs)
    data_gen = data_gen_cls(n, c, pred_ctx, label_ctx)
    try:
        label, pred = data_gen.data()
        mx.nd.waitall()
        before = time.time()
        metric.update([label] * i, [pred] * i)
        mx.nd.waitall()
        elapsed = time.time() - before
        elapsed_str = "{:<.5}".format(elapsed)
    except mx.MXNetError:
        elapsed_str = "FAILED"
    print("{metric:<15}{pctx:<10}{lctx:<12}{niter:<12}{bs:<15}{out_dim:<15}{elapsed:<}".format(
        metric=name, pctx=str(pred_ctx), lctx=str(label_ctx), niter=i * n, bs=data_gen.batch_size,
        out_dim=data_gen.output_dim, elapsed=elapsed_str), file=sys.stderr) 
Example #6
Source File: __init__.py    From dgl with Apache License 2.0 5 votes vote down vote up
def is_cuda_available():
    # TODO: Does MXNet have a convenient function to test GPU availability/compilation?
    try:
        a = nd.array([1, 2, 3], ctx=mx.gpu())
        return True
    except mx.MXNetError:
        return False 
Example #7
Source File: utils.py    From sockeye with Apache License 2.0 5 votes vote down vote up
def get_num_gpus() -> int:
    """
    Gets the number of GPUs available on the host.

    :return: The number of GPUs on the system.
    """
    try:
        return mx.context.num_gpus()
    except mx.MXNetError:
        # Some builds of MXNet will raise a CUDA error when CUDA is not
        # installed on the host.  In this case, zero GPUs are available.
        return 0 
Example #8
Source File: model_handler.py    From xfer with Apache License 2.0 5 votes vote down vote up
def get_module(self, iterator, fixed_layer_parameters=None, random_layer_parameters=None):
        """
        Return MXNet Module using the model symbol and parameters.

        :param iterator: MXNet iterator to be used with model.
        :type iterator: :class:`mxnet.io.DataIter`
        :param list(str) fixed_layer_parameters: List of layer parameters to keep fixed.
        :param list(str) random_layer_parameters: List of layer parameters to randomise.
        :return: MXNet module
        :rtype: :class:`mx.module.Module`
        """
        if fixed_layer_parameters is not None:
            fixed_layer_parameters = self._prune_parameters(fixed_layer_parameters)
        if random_layer_parameters is None:
            arg_params, aux_params = self.arg_params.copy(), self.aux_params.copy()
        else:
            arg_params, aux_params = self._remove_random_parameters(random_layer_parameters)
        mod = mx.mod.Module(symbol=self.symbol, context=self.devices, fixed_param_names=fixed_layer_parameters,
                            label_names=(self.layer_names[-1] + "_label",), data_names=(self.data_name,))
        mod.bind(data_shapes=iterator.provide_data, label_shapes=iterator.provide_label)
        mod.init_params(mx.init.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2))
        try:
            mod.set_params(arg_params, aux_params, allow_missing=True, force_init=True)
        except mx.MXNetError as e:
            exceptions._handle_mxnet_error(e)
        return mod 
Example #9
Source File: test_random.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 4 votes vote down vote up
def test_sample_multinomial():
    for dtype in ['uint8', 'int32', 'float16', 'float32', 'float64']: # output array types
        for x in [mx.nd.array([[0,1,2,3,4],[4,3,2,1,0]])/10.0, mx.nd.array([0,1,2,3,4])/10.0]:
            dx = mx.nd.ones_like(x)
            mx.contrib.autograd.mark_variables([x], [dx])
            # Adding rtol and increasing samples needed to pass with seed 2951820647
            samples = 10000
            with mx.autograd.record():
                y, prob = mx.nd.random.multinomial(x, shape=samples, get_prob=True, dtype=dtype)
                r = prob * 5
                r.backward()

            assert(np.dtype(dtype) == y.dtype)
            y = y.asnumpy()
            x = x.asnumpy()
            dx = dx.asnumpy()
            if len(x.shape) is 1:
                x = x.reshape((1, x.shape[0]))
                dx = dx.reshape(1, dx.shape[0])
                y = y.reshape((1, y.shape[0]))
                prob = prob.reshape((1, prob.shape[0]))
            for i in range(x.shape[0]):
                freq = np.bincount(y[i,:].astype('int32'), minlength=5)/np.float32(samples)*x[i,:].sum()
                mx.test_utils.assert_almost_equal(freq, x[i], rtol=0.20, atol=1e-1)
                rprob = x[i][y[i].astype('int32')]/x[i].sum()
                mx.test_utils.assert_almost_equal(np.log(rprob), prob.asnumpy()[i], atol=1e-5)

                real_dx = np.zeros((5,))
                for j in range(samples):
                    real_dx[int(y[i][j])] += 5.0 / rprob[j]
                mx.test_utils.assert_almost_equal(real_dx, dx[i, :], rtol=1e-4, atol=1e-5)
    for dtype in ['uint8', 'float16', 'float32']:
        # Bound check for the output data types. 'int32' and 'float64' require large memory so are skipped.
        x = mx.nd.zeros(2 ** 25)  # Larger than the max integer in float32 without precision loss.
        bound_check = False
        try:
            y = mx.nd.random.multinomial(x, dtype=dtype)
        except mx.MXNetError as e:
            bound_check = True
        assert bound_check

# Test the generators with the chi-square testing 
Example #10
Source File: test_random.py    From SNIPER-mxnet with Apache License 2.0 4 votes vote down vote up
def test_random_seed_setting_for_context():
    seed_to_test = 1234
    num_temp_seeds = 25
    probs = [0.125, 0.25, 0.25, 0.0625, 0.125, 0.1875]
    num_samples = 100000
    dev_type = mx.context.current_context().device_type
    for dtype in ['float16', 'float32', 'float64']:
        samples_imp = []
        samples_sym = []
        # Collect random number samples from the generators of all devices, each seeded with the same number.
        for dev_id in range(0, 16 if dev_type == 'gpu' else 1):
            # Currently python API does not provide a method to get the number of gpu devices.
            # Waiting for PR #10354, which provides the method, to be merged.
            # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`.
            try:
                with mx.Context(dev_type, dev_id):
                    ctx = mx.context.current_context()
                    seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test)

                    # Check imperative. `multinomial` uses non-parallel rng.
                    rnds = mx.nd.random.multinomial(data=mx.nd.array(probs, dtype=dtype), shape=num_samples)
                    samples_imp.append(rnds.asnumpy())

                    # Check symbolic. `multinomial` uses non-parallel rng.
                    P = mx.sym.Variable("P")
                    X = mx.sym.random.multinomial(data=P, shape=num_samples, get_prob=False)
                    exe = X.bind(ctx, {"P": mx.nd.array(probs, dtype=dtype)})
                    set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test)
                    exe.forward()
                    samples_sym.append(exe.outputs[0].asnumpy())
            except mx.MXNetError as e:
                if str(e).find("invalid device ordinal") != -1:
                    break
                else:
                    raise e
        # The samples should be identical across different gpu devices.
        for i in range(1, len(samples_imp)):
            assert same(samples_imp[i - 1], samples_imp[i])
        for i in range(1, len(samples_sym)):
            assert same(samples_sym[i - 1], samples_sym[i])

# Tests that seed setting of parallel rng for specific context is synchronous w.r.t. rng use before and after. 
Example #11
Source File: test_random.py    From SNIPER-mxnet with Apache License 2.0 4 votes vote down vote up
def test_parallel_random_seed_setting_for_context():
    seed_to_test = 1234
    dev_type = mx.context.current_context().device_type
    for dtype in ['float16', 'float32', 'float64']:
        samples_imp = []
        samples_sym = []
        # Collect random number samples from the generators of all devices, each seeded with the same number.
        for dev_id in range(0, 16 if dev_type == 'gpu' else 1):
            # Currently python API does not provide a method to get the number of gpu devices.
            # Waiting for PR #10354, which provides the method, to be merged.
            # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`.
            try:
                with mx.Context(dev_type, dev_id):
                    ctx = mx.context.current_context()
                    # Avoid excessive test cpu runtimes.
                    num_temp_seeds = 25 if dev_type == 'gpu' else 1
                    # To flush out a possible race condition, run multiple times.
                    for _ in range(20):
                        # Create enough samples such that we get a meaningful distribution.
                        shape = (200, 200)
                        params = { 'low': -1.5, 'high': 3.0 }
                        params.update(shape=shape, dtype=dtype)

                        # Check imperative. `uniform` uses parallel rng.
                        seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test)
                        rnds = mx.nd.random.uniform(**params)
                        samples_imp.append(rnds.asnumpy())

                        # Check symbolic. `uniform` uses parallel rng.
                        X = mx.sym.Variable("X")
                        Y = mx.sym.random.uniform(**params) + X
                        x = mx.nd.zeros(shape, dtype=dtype)
                        xgrad = mx.nd.zeros(shape, dtype=dtype)
                        yexec = Y.bind(ctx, {'X' : x}, {'X': xgrad})
                        set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test)
                        yexec.forward(is_train=True)
                        yexec.backward(yexec.outputs[0])
                        samples_sym.append(yexec.outputs[0].asnumpy())
            except mx.MXNetError as e:
                if str(e).find("invalid device ordinal") != -1:
                    break
                else:
                    raise e
        # The samples should be identical across different gpu devices.
        for i in range(1, len(samples_imp)):
            assert same(samples_imp[i - 1], samples_imp[i])
        for i in range(1, len(samples_sym)):
            assert same(samples_sym[i - 1], samples_sym[i])