Python mxnet.ndarray.zeros() Examples
The following are 30
code examples of mxnet.ndarray.zeros().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
mxnet.ndarray
, or try the search function
.
Example #1
Source File: ytf.py From MaskInsightface with Apache License 2.0 | 6 votes |
def get_feature_set(name, vid, args): global feature_cache key = (name,vid) if key in feature_cache: return feature_cache[key] input_dir = os.path.join(args.image_dir, name, str(vid)) data = nd.zeros( (1 ,3, image_size[0], image_size[1]) ) F = [] for img in os.listdir(input_dir): img = os.path.join(input_dir, img) img = cv2.imread(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, (2,0,1)) data[0][:] = img db = mx.io.DataBatch(data=(data,)) model.forward(db, is_train=False) net_out = model.get_outputs()[0].asnumpy().flatten() F.append(net_out) F = np.array(F) F = sklearn.preprocessing.normalize(F) feature_cache[key] = F return F
Example #2
Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def run_toy_SGLD(): X, Y, X_test, Y_test = load_toy() minibatch_size = 1 teacher_noise_precision = 1.0 / 9.0 net = get_toy_sym(True, teacher_noise_precision) data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} initializer = mx.init.Uniform(0.07) exe, params, _ = \ SGLD(sym=net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000, initializer=initializer, learning_rate=1E-4, # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5), prior_precision=0.1, burn_in_iter_num=1000, thin_interval=10, task='regression', minibatch_size=minibatch_size, dev=dev())
Example #3
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None): if grad is None: grad = nd.empty(theta.shape, theta.context) theta1 = theta.asnumpy()[0] theta2 = theta.asnumpy()[1] v1 = sigma1 ** 2 v2 = sigma2 ** 2 vx = sigmax ** 2 denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp( -(X - theta1 - theta2) ** 2 / (2 * vx)) grad_npy = numpy.zeros(theta.shape) grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta1 / v1 grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta2 / v2 grad[:] = grad_npy return grad
Example #4
Source File: inference_time_evaluation_mxnet.py From MobileFace with MIT License | 6 votes |
def get_time(self, module): data = nd.zeros(self.input_shape) batch = mxnet.io.DataBatch(data=(data,)) all_time = [] symbol_name = self.symbol_file.split('/')[-1] print 'Start to evaluate: %s' % (symbol_name) for i in xrange(self.iteration): time_start = datetime.datetime.now() module.forward(batch, is_train=False) net_out = module.get_outputs()[0].asnumpy() time_end = datetime.datetime.now() one_time = time_end - time_start all_time.append(one_time.total_seconds()) print 'Finish %d iterations in %f ms. Average infer time is [%f ms].' % ( self.iteration, numpy.sum(all_time)*1000, numpy.mean(all_time)*1000)
Example #5
Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None): if grad is None: grad = nd.empty(theta.shape, theta.context) theta1 = theta.asnumpy()[0] theta2 = theta.asnumpy()[1] v1 = sigma1 ** 2 v2 = sigma2 ** 2 vx = sigmax ** 2 denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp( -(X - theta1 - theta2) ** 2 / (2 * vx)) grad_npy = numpy.zeros(theta.shape) grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta1 / v1 grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta2 / v2 grad[:] = grad_npy return grad
Example #6
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 6 votes |
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None): if grad is None: grad = nd.empty(theta.shape, theta.context) theta1 = theta.asnumpy()[0] theta2 = theta.asnumpy()[1] v1 = sigma1 ** 2 v2 = sigma2 ** 2 vx = sigmax ** 2 denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp( -(X - theta1 - theta2) ** 2 / (2 * vx)) grad_npy = numpy.zeros(theta.shape) grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta1 / v1 grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * ( X - theta1 - theta2) / vx) / denominator).sum() \ + theta2 / v2 grad[:] = grad_npy return grad
Example #7
Source File: test_contrib_text.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_download_embed(): @text.embedding.register class Test(text.embedding._TokenEmbedding): # 33 bytes. pretrained_file_name_sha1 = \ {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'} namespace = 'test' def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs): pretrained_file_name = 'embedding_test.vec' Test._check_pretrained_file_names(pretrained_file_name) super(Test, self).__init__(**kwargs) pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name) self._load_embedding(pretrained_file_path, ' ', init_unknown_vec) test_embed = text.embedding.create('test') assert test_embed.token_to_idx['hello'] == 1 assert test_embed.token_to_idx['world'] == 2 assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy()) assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy()) assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy())
Example #8
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 6 votes |
def run_toy_SGLD(): X, Y, X_test, Y_test = load_toy() minibatch_size = 1 teacher_noise_precision = 1.0 / 9.0 net = get_toy_sym(True, teacher_noise_precision) data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} initializer = mx.init.Uniform(0.07) exe, params, _ = \ SGLD(sym=net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000, initializer=initializer, learning_rate=1E-4, # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5), prior_precision=0.1, burn_in_iter_num=1000, thin_interval=10, task='regression', minibatch_size=minibatch_size, dev=dev())
Example #9
Source File: test_contrib_text.py From SNIPER-mxnet with Apache License 2.0 | 6 votes |
def test_download_embed(): @text.embedding.register class Test(text.embedding._TokenEmbedding): # 33 bytes. pretrained_file_name_sha1 = \ {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'} namespace = 'test' def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs): pretrained_file_name = 'embedding_test.vec' Test._check_pretrained_file_names(pretrained_file_name) super(Test, self).__init__(**kwargs) pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name) self._load_embedding(pretrained_file_path, ' ', init_unknown_vec) test_embed = text.embedding.create('test') assert test_embed.token_to_idx['hello'] == 1 assert test_embed.token_to_idx['world'] == 2 assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy()) assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy()) assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy())
Example #10
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def run_toy_SGLD(): X, Y, X_test, Y_test = load_toy() minibatch_size = 1 teacher_noise_precision = 1.0 / 9.0 net = get_toy_sym(True, teacher_noise_precision) data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} initializer = mx.init.Uniform(0.07) exe, params, _ = \ SGLD(sym=net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000, initializer=initializer, learning_rate=1E-4, # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5), prior_precision=0.1, burn_in_iter_num=1000, thin_interval=10, task='regression', minibatch_size=minibatch_size, dev=dev())
Example #11
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def run_mnist_SGD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, lr=5E-6, prior_precision=1.0, minibatch_size=100)
Example #12
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def run_toy_HMC(): X, Y, X_test, Y_test = load_toy() minibatch_size = Y.shape[0] noise_precision = 1 / 9.0 net = get_toy_sym(True, noise_precision) data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} initializer = mx.init.Uniform(0.07) sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, sample_num=300000, initializer=initializer, prior_precision=1.0, learning_rate=1E-3, L=10, dev=dev())
Example #13
Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def run_synthetic_SGLD(): theta1 = 0 theta2 = 1 sigma1 = numpy.sqrt(10) sigma2 = 1 sigmax = numpy.sqrt(2) X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100) minibatch_size = 1 total_iter_num = 1000000 lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num, factor=0.55) optimizer = mx.optimizer.create('sgld', learning_rate=None, rescale_grad=1.0, lr_scheduler=lr_scheduler, wd=0) updater = mx.optimizer.get_updater(optimizer) theta = mx.random.normal(0, 1, (2,), mx.cpu()) grad = nd.empty((2,), mx.cpu()) samples = numpy.zeros((2, total_iter_num)) start = time.time() for i in xrange(total_iter_num): if (i + 1) % 100000 == 0: end = time.time() print("Iter:%d, Time spent: %f" % (i + 1, end - start)) start = time.time() ind = numpy.random.randint(0, X.shape[0]) synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad= X.shape[0] / float(minibatch_size), grad=grad) updater('theta', grad, theta) samples[:, i] = theta.asnumpy() plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet) plt.colorbar() plt.show()
Example #14
Source File: ytf.py From MaskInsightface with Apache License 2.0 | 5 votes |
def get_feature(name, vid, args): global feature_cache key = (name,vid) if key in feature_cache: return feature_cache[key] input_dir = os.path.join(args.image_dir, name, str(vid)) data = nd.zeros( (1 ,3, image_size[0], image_size[1]) ) F = [] for img in os.listdir(input_dir): img = os.path.join(input_dir, img) img = cv2.imread(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, (2,0,1)) data[0][:] = img db = mx.io.DataBatch(data=(data,)) model.forward(db, is_train=False) net_out = model.get_outputs()[0].asnumpy().flatten() F.append(net_out) F = np.array(F) F = sklearn.preprocessing.normalize(F) feature = np.mean(F, axis=0, keepdims=True) feature = sklearn.preprocessing.normalize(feature).flatten() feature_cache[key] = feature return feature
Example #15
Source File: ytf.py From 1.FaceRecognition with MIT License | 5 votes |
def get_feature(name, vid, args): global feature_cache key = (name,vid) if key in feature_cache: return feature_cache[key] input_dir = os.path.join(args.image_dir, name, str(vid)) data = nd.zeros( (1 ,3, image_size[0], image_size[1]) ) F = [] for img in os.listdir(input_dir): img = os.path.join(input_dir, img) img = cv2.imread(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, (2,0,1)) data[0][:] = img db = mx.io.DataBatch(data=(data,)) model.forward(db, is_train=False) net_out = model.get_outputs()[0].asnumpy().flatten() F.append(net_out) F = np.array(F) F = sklearn.preprocessing.normalize(F) feature = np.mean(F, axis=0, keepdims=True) feature = sklearn.preprocessing.normalize(feature).flatten() feature_cache[key] = feature return feature
Example #16
Source File: parall_module_local_v1.py From 1.FaceRecognition with MIT License | 5 votes |
def get_ndarray2(self, context, name, arr): key = "%s_%s"%(name, context) #print(key) if not key in self._nd_cache: v = nd.zeros( shape=arr.shape, ctx = context) self._nd_cache[key] = v else: v = self._nd_cache[key] arr.copyto(v) return v
Example #17
Source File: parall_module_local_v1.py From 1.FaceRecognition with MIT License | 5 votes |
def get_ndarray(self, context, name, shape): key = "%s_%s"%(name, context) #print(key) if not key in self._nd_cache: v = nd.zeros( shape=shape, ctx = context) self._nd_cache[key] = v else: v = self._nd_cache[key] return v
Example #18
Source File: train_cgan.py From panoptic-fpn-gluon with Apache License 2.0 | 5 votes |
def gan_loss(input,target_is_real): if target_is_real: target = nd.ones(input.shape,ctx=input.context) else: target = nd.zeros(input.shape, ctx=input.context) #mse loss for lsgan e = ((input - target) ** 2).mean(axis=0, exclude=True) return e
Example #19
Source File: train_cgan.py From panoptic-fpn-gluon with Apache License 2.0 | 5 votes |
def weights_init(layers): for layer in layers: classname = layer.__class__.__name__ if hasattr(layer, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): layer.weight.set_data(nd.random.normal(0.0,0.02,shape=layer.weight.data().shape)) if hasattr(layer, 'bias') and layer.bias is not None: layer.bias.set_data(nd.zeros(layer.bias.data().shape)) elif classname.find('BatchNorm') != -1: layer.gamma.set_data(nd.random.normal(1.0, 0.02,shape=layer.gamma.data().shape)) layer.beta.set_data(nd.zeros(layer.bias.data().shape))
Example #20
Source File: train_srgan.py From panoptic-fpn-gluon with Apache License 2.0 | 5 votes |
def weights_init(params): for param_name in params: param = params[param_name] if param_name.find('conv') != -1: if param_name.find('weight') != -1: param.set_data(nd.random.normal(0.0,0.02,shape=param.data().shape)) elif param_name.find('bias') != -1: param.set_data(nd.zeros(param.data().shape)) elif param_name.find('batchnorm') != -1: if param_name.find('gamma') != -1: param.set_data(nd.random.normal(1.0, 0.02,shape=param.data().shape)) elif param_name.find('beta') != -1: param.set_data(nd.zeros(param.data().shape))
Example #21
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def run_mnist_SGLD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, learning_rate=4E-6, prior_precision=1.0, minibatch_size=100, thin_interval=100, burn_in_iter_num=1000)
Example #22
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def run_synthetic_SGLD(): theta1 = 0 theta2 = 1 sigma1 = numpy.sqrt(10) sigma2 = 1 sigmax = numpy.sqrt(2) X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100) minibatch_size = 1 total_iter_num = 1000000 lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num, factor=0.55) optimizer = mx.optimizer.create('sgld', learning_rate=None, rescale_grad=1.0, lr_scheduler=lr_scheduler, wd=0) updater = mx.optimizer.get_updater(optimizer) theta = mx.random.normal(0, 1, (2,), mx.cpu()) grad = nd.empty((2,), mx.cpu()) samples = numpy.zeros((2, total_iter_num)) start = time.time() for i in xrange(total_iter_num): if (i + 1) % 100000 == 0: end = time.time() print("Iter:%d, Time spent: %f" % (i + 1, end - start)) start = time.time() ind = numpy.random.randint(0, X.shape[0]) synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad= X.shape[0] / float(minibatch_size), grad=grad) updater('theta', grad, theta) samples[:, i] = theta.asnumpy() plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet) plt.colorbar() plt.show()
Example #23
Source File: base.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def update_acc_grad(self): if self.acc_grad is None: self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx)) for n, v in self.params_grad.items()]) for k, v in self.acc_grad.items(): v[:] = v + self.params_grad[k]
Example #24
Source File: base.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def update_acc_grad(self): if self.acc_grad is None: self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx)) for n, v in self.params_grad.items()]) for k, v in self.acc_grad.items(): v[:] = v + self.params_grad[k]
Example #25
Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def run_mnist_DistilledSGLD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 if training_num >= 10000: num_hidden = 800 total_iter_num = 1000000 teacher_learning_rate = 1E-6 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.1 else: num_hidden = 400 total_iter_num = 20000 teacher_learning_rate = 4E-5 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.001 teacher_net = get_mnist_sym(num_hidden=num_hidden) logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size,) + X.shape[1::] teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net, teacher_data_inputs=teacher_data_inputs, student_data_inputs=student_data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num, student_initializer=student_initializer, teacher_initializer=teacher_initializer, student_optimizing_algorithm="adam", teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
Example #26
Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def run_mnist_DistilledSGLD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 if training_num >= 10000: num_hidden = 800 total_iter_num = 1000000 teacher_learning_rate = 1E-6 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.1 else: num_hidden = 400 total_iter_num = 20000 teacher_learning_rate = 4E-5 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.001 teacher_net = get_mnist_sym(num_hidden=num_hidden) logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size,) + X.shape[1::] teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net, teacher_data_inputs=teacher_data_inputs, student_data_inputs=student_data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num, student_initializer=student_initializer, teacher_initializer=teacher_initializer, student_optimizing_algorithm="adam", teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
Example #27
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def run_mnist_SGD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, lr=5E-6, prior_precision=1.0, minibatch_size=100)
Example #28
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def run_mnist_SGLD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, learning_rate=4E-6, prior_precision=1.0, minibatch_size=100, thin_interval=100, burn_in_iter_num=1000)
Example #29
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def run_mnist_DistilledSGLD(training_num=50000): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 if training_num >= 10000: num_hidden = 800 total_iter_num = 1000000 teacher_learning_rate = 1E-6 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.1 else: num_hidden = 400 total_iter_num = 20000 teacher_learning_rate = 4E-5 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.001 teacher_net = get_mnist_sym(num_hidden=num_hidden) logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size,) + X.shape[1::] teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net, teacher_data_inputs=teacher_data_inputs, student_data_inputs=student_data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num, student_initializer=student_initializer, teacher_initializer=teacher_initializer, student_optimizing_algorithm="adam", teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
Example #30
Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def run_toy_HMC(): X, Y, X_test, Y_test = load_toy() minibatch_size = Y.shape[0] noise_precision = 1 / 9.0 net = get_toy_sym(True, noise_precision) data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} initializer = mx.init.Uniform(0.07) sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, sample_num=300000, initializer=initializer, prior_precision=1.0, learning_rate=1E-3, L=10, dev=dev())