Python mxnet.ndarray.zeros() Examples

The following are 30 code examples of mxnet.ndarray.zeros(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mxnet.ndarray , or try the search function .
Example #1
Source File: ytf.py    From MaskInsightface with Apache License 2.0 6 votes vote down vote up
def get_feature_set(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)

  feature_cache[key] = F
  return F 
Example #2
Source File: bdk_demo.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev()) 
Example #3
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad 
Example #4
Source File: inference_time_evaluation_mxnet.py    From MobileFace with MIT License 6 votes vote down vote up
def get_time(self, module):
        data = nd.zeros(self.input_shape)
        batch = mxnet.io.DataBatch(data=(data,))
        
        all_time = []

        symbol_name = self.symbol_file.split('/')[-1]
        print 'Start to evaluate: %s' % (symbol_name)
        for i in xrange(self.iteration):
            time_start = datetime.datetime.now()

            module.forward(batch, is_train=False)
            net_out = module.get_outputs()[0].asnumpy()

            time_end = datetime.datetime.now()
            one_time = time_end - time_start
            all_time.append(one_time.total_seconds())

        print 'Finish %d iterations in %f ms. Average infer time is [%f ms].' % (
        self.iteration, numpy.sum(all_time)*1000, numpy.mean(all_time)*1000) 
Example #5
Source File: bdk_demo.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad 
Example #6
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 6 votes vote down vote up
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad 
Example #7
Source File: test_contrib_text.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_download_embed():
    @text.embedding.register
    class Test(text.embedding._TokenEmbedding):
        # 33 bytes.
        pretrained_file_name_sha1 = \
            {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'}
        namespace = 'test'

        def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs):
            pretrained_file_name = 'embedding_test.vec'
            Test._check_pretrained_file_names(pretrained_file_name)

            super(Test, self).__init__(**kwargs)

            pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name)

            self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)

    test_embed = text.embedding.create('test')
    assert test_embed.token_to_idx['hello'] == 1
    assert test_embed.token_to_idx['world'] == 2
    assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy()) 
Example #8
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 6 votes vote down vote up
def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev()) 
Example #9
Source File: test_contrib_text.py    From SNIPER-mxnet with Apache License 2.0 6 votes vote down vote up
def test_download_embed():
    @text.embedding.register
    class Test(text.embedding._TokenEmbedding):
        # 33 bytes.
        pretrained_file_name_sha1 = \
            {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'}
        namespace = 'test'

        def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs):
            pretrained_file_name = 'embedding_test.vec'
            Test._check_pretrained_file_names(pretrained_file_name)

            super(Test, self).__init__(**kwargs)

            pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name)

            self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)

    test_embed = text.embedding.create('test')
    assert test_embed.token_to_idx['hello'] == 1
    assert test_embed.token_to_idx['world'] == 2
    assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy()) 
Example #10
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev()) 
Example #11
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def run_mnist_SGD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             total_iter_num=1000000,
                             initializer=initializer,
                             lr=5E-6, prior_precision=1.0, minibatch_size=100) 
Example #12
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def run_toy_HMC():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = Y.shape[0]
    noise_precision = 1 / 9.0
    net = get_toy_sym(True, noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      sample_num=300000, initializer=initializer, prior_precision=1.0,
                      learning_rate=1E-3, L=10, dev=dev()) 
Example #13
Source File: bdk_demo.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def run_synthetic_SGLD():
    theta1 = 0
    theta2 = 1
    sigma1 = numpy.sqrt(10)
    sigma2 = 1
    sigmax = numpy.sqrt(2)
    X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100)
    minibatch_size = 1
    total_iter_num = 1000000
    lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num,
                                 factor=0.55)
    optimizer = mx.optimizer.create('sgld',
                                    learning_rate=None,
                                    rescale_grad=1.0,
                                    lr_scheduler=lr_scheduler,
                                    wd=0)
    updater = mx.optimizer.get_updater(optimizer)
    theta = mx.random.normal(0, 1, (2,), mx.cpu())
    grad = nd.empty((2,), mx.cpu())
    samples = numpy.zeros((2, total_iter_num))
    start = time.time()
    for i in xrange(total_iter_num):
        if (i + 1) % 100000 == 0:
            end = time.time()
            print("Iter:%d, Time spent: %f" % (i + 1, end - start))
            start = time.time()
        ind = numpy.random.randint(0, X.shape[0])
        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
        X.shape[0] / float(minibatch_size), grad=grad)
        updater('theta', grad, theta)
        samples[:, i] = theta.asnumpy()
    plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
    plt.colorbar()
    plt.show() 
Example #14
Source File: ytf.py    From MaskInsightface with Apache License 2.0 5 votes vote down vote up
def get_feature(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)
  feature = np.mean(F, axis=0, keepdims=True)
  feature = sklearn.preprocessing.normalize(feature).flatten()

  feature_cache[key] = feature
  return feature 
Example #15
Source File: ytf.py    From 1.FaceRecognition with MIT License 5 votes vote down vote up
def get_feature(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)
  feature = np.mean(F, axis=0, keepdims=True)
  feature = sklearn.preprocessing.normalize(feature).flatten()

  feature_cache[key] = feature
  return feature 
Example #16
Source File: parall_module_local_v1.py    From 1.FaceRecognition with MIT License 5 votes vote down vote up
def get_ndarray2(self, context, name, arr):
      key = "%s_%s"%(name, context)
      #print(key)
      if not key in self._nd_cache:
        v = nd.zeros( shape=arr.shape, ctx = context)
        self._nd_cache[key] = v
      else:
        v = self._nd_cache[key]
      arr.copyto(v)
      return v 
Example #17
Source File: parall_module_local_v1.py    From 1.FaceRecognition with MIT License 5 votes vote down vote up
def get_ndarray(self, context, name, shape):
      key = "%s_%s"%(name, context)
      #print(key)
      if not key in self._nd_cache:
        v = nd.zeros( shape=shape, ctx = context)
        self._nd_cache[key] = v
      else:
        v = self._nd_cache[key]
      return v 
Example #18
Source File: train_cgan.py    From panoptic-fpn-gluon with Apache License 2.0 5 votes vote down vote up
def gan_loss(input,target_is_real):
        if target_is_real:
            target = nd.ones(input.shape,ctx=input.context)
        else:
            target = nd.zeros(input.shape, ctx=input.context)
        #mse loss for lsgan
        e = ((input - target) ** 2).mean(axis=0, exclude=True)
        return e 
Example #19
Source File: train_cgan.py    From panoptic-fpn-gluon with Apache License 2.0 5 votes vote down vote up
def weights_init(layers):
    for layer in layers:
        classname = layer.__class__.__name__
        if hasattr(layer, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
            layer.weight.set_data(nd.random.normal(0.0,0.02,shape=layer.weight.data().shape))
            if hasattr(layer, 'bias') and layer.bias is not None:
                layer.bias.set_data(nd.zeros(layer.bias.data().shape))
        elif classname.find('BatchNorm') != -1:
            layer.gamma.set_data(nd.random.normal(1.0, 0.02,shape=layer.gamma.data().shape))
            layer.beta.set_data(nd.zeros(layer.bias.data().shape)) 
Example #20
Source File: train_srgan.py    From panoptic-fpn-gluon with Apache License 2.0 5 votes vote down vote up
def weights_init(params):
    for param_name in params:
        param = params[param_name]
        if param_name.find('conv') != -1:
            if param_name.find('weight') != -1:
                param.set_data(nd.random.normal(0.0,0.02,shape=param.data().shape))
            elif param_name.find('bias') != -1:
                param.set_data(nd.zeros(param.data().shape))
        elif param_name.find('batchnorm') != -1:
            if param_name.find('gamma') != -1:
                param.set_data(nd.random.normal(1.0, 0.02,shape=param.data().shape))
            elif param_name.find('beta') != -1:
                param.set_data(nd.zeros(param.data().shape)) 
Example #21
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def run_mnist_SGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
                            thin_interval=100, burn_in_iter_num=1000) 
Example #22
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def run_synthetic_SGLD():
    theta1 = 0
    theta2 = 1
    sigma1 = numpy.sqrt(10)
    sigma2 = 1
    sigmax = numpy.sqrt(2)
    X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100)
    minibatch_size = 1
    total_iter_num = 1000000
    lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num,
                                 factor=0.55)
    optimizer = mx.optimizer.create('sgld',
                                    learning_rate=None,
                                    rescale_grad=1.0,
                                    lr_scheduler=lr_scheduler,
                                    wd=0)
    updater = mx.optimizer.get_updater(optimizer)
    theta = mx.random.normal(0, 1, (2,), mx.cpu())
    grad = nd.empty((2,), mx.cpu())
    samples = numpy.zeros((2, total_iter_num))
    start = time.time()
    for i in xrange(total_iter_num):
        if (i + 1) % 100000 == 0:
            end = time.time()
            print("Iter:%d, Time spent: %f" % (i + 1, end - start))
            start = time.time()
        ind = numpy.random.randint(0, X.shape[0])
        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
        X.shape[0] / float(minibatch_size), grad=grad)
        updater('theta', grad, theta)
        samples[:, i] = theta.asnumpy()
    plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
    plt.colorbar()
    plt.show() 
Example #23
Source File: base.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def update_acc_grad(self):
        if self.acc_grad is None:
            self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx))
                                         for n, v in self.params_grad.items()])
        for k, v in self.acc_grad.items():
            v[:] = v + self.params_grad[k] 
Example #24
Source File: base.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def update_acc_grad(self):
        if self.acc_grad is None:
            self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx))
                                         for n, v in self.params_grad.items()])
        for k, v in self.acc_grad.items():
            v[:] = v + self.params_grad[k] 
Example #25
Source File: bdk_demo.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev()) 
Example #26
Source File: bdk_demo.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev()) 
Example #27
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def run_mnist_SGD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             total_iter_num=1000000,
                             initializer=initializer,
                             lr=5E-6, prior_precision=1.0, minibatch_size=100) 
Example #28
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def run_mnist_SGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
                            thin_interval=100, burn_in_iter_num=1000) 
Example #29
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev()) 
Example #30
Source File: bdk_demo.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def run_toy_HMC():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = Y.shape[0]
    noise_precision = 1 / 9.0
    net = get_toy_sym(True, noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      sample_num=300000, initializer=initializer, prior_precision=1.0,
                      learning_rate=1E-3, L=10, dev=dev())