Python mxnet.MXNetError() Examples
Example #1
Source File: From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_invalid_operations(): def check_invalid_gluon_trainer_reset(): params = mx.gluon.ParameterDict() x = params.get('x', shape=(4, 2), lr_mult=1.0, stype='row_sparse') params.initialize(ctx=mx.cpu(0), init='zeros') trainer = mx.gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv)'test_gluon_trainer_reset_' + str(my_rank) + '.params') row_id = mx.nd.arange(0, 4) w = x.row_sparse_data(row_id) assert trainer._kv_initialized and trainer._update_on_kvstore mx.nd.waitall() # load would fail to reset kvstore since update_on_kvstore is True assert_exception(params.load, RuntimeError, 'test_gluon_trainer_reset_' + str(my_rank) + '.params') print('worker ' + str(my_rank) + ' passed check_invalid_gluon_trainer_reset') def check_invalid_pull(): kv.init(keys_invalid[0], mx.nd.ones((2,2)).tostype('row_sparse')) out = mx.nd.ones((2,2)).tostype('row_sparse') assert_exception(kv.pull, mx.MXNetError, 'invalid_key', out=out, ignore_sparse=False) print('worker ' + str(my_rank) + ' passed check_invalid_pull') check_invalid_gluon_trainer_reset() check_invalid_pull()
Example #2
Source File: From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs): """ Helper function for running one metric benchmark """ metric = mx.metric.create(name, **kwargs) data_gen = data_gen_cls(n, c, pred_ctx, label_ctx) try: label, pred = mx.nd.waitall() before = time.time() metric.update([label] * i, [pred] * i) mx.nd.waitall() elapsed = time.time() - before elapsed_str = "{:<.5}".format(elapsed) except mx.MXNetError: elapsed_str = "FAILED" print("{metric:<15}{pctx:<10}{lctx:<12}{niter:<12}{bs:<15}{out_dim:<15}{elapsed:<}".format( metric=name, pctx=str(pred_ctx), lctx=str(label_ctx), niter=i * n, bs=data_gen.batch_size, out_dim=data_gen.output_dim, elapsed=elapsed_str), file=sys.stderr)
Example #3
Source File: From sockeye with Apache License 2.0 | 6 votes |
def get_gpu_memory_usage(ctx: Union[mx.context.Context, List[mx.context.Context]]) -> Dict[int, Tuple[int, int]]: """ Returns used and total memory for GPUs identified by the given context list. :param ctx: List of MXNet context devices. :return: Dictionary of device id mapping to a tuple of (memory used, memory total). """ if isinstance(ctx, mx.context.Context): ctx = [ctx] ctx = [c for c in ctx if c.device_type == 'gpu'] if not ctx: return {} memory_data = {} # type: Dict[int, Tuple[int, int]] for c in ctx: try: free, total = mx.context.gpu_memory_info(device_id=c.device_id) # in bytes used = total - free memory_data[c.device_id] = (used * 1e-06, total * 1e-06) except mx.MXNetError: logger.exception("Failed retrieving memory data for gpu%d", c.device_id) continue log_gpu_memory_usage(memory_data) return memory_data
Example #4
Source File: From datawig with Apache License 2.0 | 6 votes |
def get_context() -> mx.context: """ Returns the a list of all available gpu contexts for a given machine. If no gpus are available, returns [mx.cpu()]. Use it to automatically return MxNet contexts (uses max number of gpus or cpu) :return: List of mxnet contexts of a gpu or [mx.cpu()] if gpu not available """ context_list = [] for gpu_number in range(16): try: _ = mx.nd.array([1, 2, 3], ctx=mx.gpu(gpu_number)) context_list.append(mx.gpu(gpu_number)) except mx.MXNetError: pass if len(context_list) == 0: context_list.append(mx.cpu()) return context_list
Example #5
Source File: From SNIPER-mxnet with Apache License 2.0 | 6 votes |
def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs): """ Helper function for running one metric benchmark """ metric = mx.metric.create(name, **kwargs) data_gen = data_gen_cls(n, c, pred_ctx, label_ctx) try: label, pred = mx.nd.waitall() before = time.time() metric.update([label] * i, [pred] * i) mx.nd.waitall() elapsed = time.time() - before elapsed_str = "{:<.5}".format(elapsed) except mx.MXNetError: elapsed_str = "FAILED" print("{metric:<15}{pctx:<10}{lctx:<12}{niter:<12}{bs:<15}{out_dim:<15}{elapsed:<}".format( metric=name, pctx=str(pred_ctx), lctx=str(label_ctx), niter=i * n, bs=data_gen.batch_size, out_dim=data_gen.output_dim, elapsed=elapsed_str), file=sys.stderr)
Example #6
Source File: From dgl with Apache License 2.0 | 5 votes |
def is_cuda_available(): # TODO: Does MXNet have a convenient function to test GPU availability/compilation? try: a = nd.array([1, 2, 3], ctx=mx.gpu()) return True except mx.MXNetError: return False
Example #7
Source File: From sockeye with Apache License 2.0 | 5 votes |
def get_num_gpus() -> int: """ Gets the number of GPUs available on the host. :return: The number of GPUs on the system. """ try: return mx.context.num_gpus() except mx.MXNetError: # Some builds of MXNet will raise a CUDA error when CUDA is not # installed on the host. In this case, zero GPUs are available. return 0
Example #8
Source File: From xfer with Apache License 2.0 | 5 votes |
def get_module(self, iterator, fixed_layer_parameters=None, random_layer_parameters=None): """ Return MXNet Module using the model symbol and parameters. :param iterator: MXNet iterator to be used with model. :type iterator: :class:`` :param list(str) fixed_layer_parameters: List of layer parameters to keep fixed. :param list(str) random_layer_parameters: List of layer parameters to randomise. :return: MXNet module :rtype: :class:`mx.module.Module` """ if fixed_layer_parameters is not None: fixed_layer_parameters = self._prune_parameters(fixed_layer_parameters) if random_layer_parameters is None: arg_params, aux_params = self.arg_params.copy(), self.aux_params.copy() else: arg_params, aux_params = self._remove_random_parameters(random_layer_parameters) mod = mx.mod.Module(symbol=self.symbol, context=self.devices, fixed_param_names=fixed_layer_parameters, label_names=(self.layer_names[-1] + "_label",), data_names=(self.data_name,)) mod.bind(data_shapes=iterator.provide_data, label_shapes=iterator.provide_label) mod.init_params(mx.init.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2)) try: mod.set_params(arg_params, aux_params, allow_missing=True, force_init=True) except mx.MXNetError as e: exceptions._handle_mxnet_error(e) return mod
Example #9
Source File: From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 4 votes |
def test_sample_multinomial(): for dtype in ['uint8', 'int32', 'float16', 'float32', 'float64']: # output array types for x in [mx.nd.array([[0,1,2,3,4],[4,3,2,1,0]])/10.0, mx.nd.array([0,1,2,3,4])/10.0]: dx = mx.nd.ones_like(x) mx.contrib.autograd.mark_variables([x], [dx]) # Adding rtol and increasing samples needed to pass with seed 2951820647 samples = 10000 with mx.autograd.record(): y, prob = mx.nd.random.multinomial(x, shape=samples, get_prob=True, dtype=dtype) r = prob * 5 r.backward() assert(np.dtype(dtype) == y.dtype) y = y.asnumpy() x = x.asnumpy() dx = dx.asnumpy() if len(x.shape) is 1: x = x.reshape((1, x.shape[0])) dx = dx.reshape(1, dx.shape[0]) y = y.reshape((1, y.shape[0])) prob = prob.reshape((1, prob.shape[0])) for i in range(x.shape[0]): freq = np.bincount(y[i,:].astype('int32'), minlength=5)/np.float32(samples)*x[i,:].sum() mx.test_utils.assert_almost_equal(freq, x[i], rtol=0.20, atol=1e-1) rprob = x[i][y[i].astype('int32')]/x[i].sum() mx.test_utils.assert_almost_equal(np.log(rprob), prob.asnumpy()[i], atol=1e-5) real_dx = np.zeros((5,)) for j in range(samples): real_dx[int(y[i][j])] += 5.0 / rprob[j] mx.test_utils.assert_almost_equal(real_dx, dx[i, :], rtol=1e-4, atol=1e-5) for dtype in ['uint8', 'float16', 'float32']: # Bound check for the output data types. 'int32' and 'float64' require large memory so are skipped. x = mx.nd.zeros(2 ** 25) # Larger than the max integer in float32 without precision loss. bound_check = False try: y = mx.nd.random.multinomial(x, dtype=dtype) except mx.MXNetError as e: bound_check = True assert bound_check # Test the generators with the chi-square testing
Example #10
Source File: From SNIPER-mxnet with Apache License 2.0 | 4 votes |
def test_random_seed_setting_for_context(): seed_to_test = 1234 num_temp_seeds = 25 probs = [0.125, 0.25, 0.25, 0.0625, 0.125, 0.1875] num_samples = 100000 dev_type = mx.context.current_context().device_type for dtype in ['float16', 'float32', 'float64']: samples_imp = [] samples_sym = [] # Collect random number samples from the generators of all devices, each seeded with the same number. for dev_id in range(0, 16 if dev_type == 'gpu' else 1): # Currently python API does not provide a method to get the number of gpu devices. # Waiting for PR #10354, which provides the method, to be merged. # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`. try: with mx.Context(dev_type, dev_id): ctx = mx.context.current_context() seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test) # Check imperative. `multinomial` uses non-parallel rng. rnds = mx.nd.random.multinomial(data=mx.nd.array(probs, dtype=dtype), shape=num_samples) samples_imp.append(rnds.asnumpy()) # Check symbolic. `multinomial` uses non-parallel rng. P = mx.sym.Variable("P") X = mx.sym.random.multinomial(data=P, shape=num_samples, get_prob=False) exe = X.bind(ctx, {"P": mx.nd.array(probs, dtype=dtype)}) set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test) exe.forward() samples_sym.append(exe.outputs[0].asnumpy()) except mx.MXNetError as e: if str(e).find("invalid device ordinal") != -1: break else: raise e # The samples should be identical across different gpu devices. for i in range(1, len(samples_imp)): assert same(samples_imp[i - 1], samples_imp[i]) for i in range(1, len(samples_sym)): assert same(samples_sym[i - 1], samples_sym[i]) # Tests that seed setting of parallel rng for specific context is synchronous w.r.t. rng use before and after.
Example #11
Source File: From SNIPER-mxnet with Apache License 2.0 | 4 votes |
def test_parallel_random_seed_setting_for_context(): seed_to_test = 1234 dev_type = mx.context.current_context().device_type for dtype in ['float16', 'float32', 'float64']: samples_imp = [] samples_sym = [] # Collect random number samples from the generators of all devices, each seeded with the same number. for dev_id in range(0, 16 if dev_type == 'gpu' else 1): # Currently python API does not provide a method to get the number of gpu devices. # Waiting for PR #10354, which provides the method, to be merged. # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`. try: with mx.Context(dev_type, dev_id): ctx = mx.context.current_context() # Avoid excessive test cpu runtimes. num_temp_seeds = 25 if dev_type == 'gpu' else 1 # To flush out a possible race condition, run multiple times. for _ in range(20): # Create enough samples such that we get a meaningful distribution. shape = (200, 200) params = { 'low': -1.5, 'high': 3.0 } params.update(shape=shape, dtype=dtype) # Check imperative. `uniform` uses parallel rng. seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test) rnds = mx.nd.random.uniform(**params) samples_imp.append(rnds.asnumpy()) # Check symbolic. `uniform` uses parallel rng. X = mx.sym.Variable("X") Y = mx.sym.random.uniform(**params) + X x = mx.nd.zeros(shape, dtype=dtype) xgrad = mx.nd.zeros(shape, dtype=dtype) yexec = Y.bind(ctx, {'X' : x}, {'X': xgrad}) set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test) yexec.forward(is_train=True) yexec.backward(yexec.outputs[0]) samples_sym.append(yexec.outputs[0].asnumpy()) except mx.MXNetError as e: if str(e).find("invalid device ordinal") != -1: break else: raise e # The samples should be identical across different gpu devices. for i in range(1, len(samples_imp)): assert same(samples_imp[i - 1], samples_imp[i]) for i in range(1, len(samples_sym)): assert same(samples_sym[i - 1], samples_sym[i])