Python Examples of mxnet.ndarray.zeros

Source File: ytf.py From MaskInsightface with Apache License 2.0

6 votes

def get_feature_set(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)

  feature_cache[key] = F
  return F

Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0

6 votes

def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev())

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad

Source File: inference_time_evaluation_mxnet.py From MobileFace with MIT License

6 votes

def get_time(self, module):
        data = nd.zeros(self.input_shape)
        batch = mxnet.io.DataBatch(data=(data,))
        
        all_time = []

        symbol_name = self.symbol_file.split('/')[-1]
        print 'Start to evaluate: %s' % (symbol_name)
        for i in xrange(self.iteration):
            time_start = datetime.datetime.now()

            module.forward(batch, is_train=False)
            net_out = module.get_outputs()[0].asnumpy()

            time_end = datetime.datetime.now()
            one_time = time_end - time_start
            all_time.append(one_time.total_seconds())

        print 'Finish %d iterations in %f ms. Average infer time is [%f ms].' % (
        self.iteration, numpy.sum(all_time)*1000, numpy.mean(all_time)*1000)

Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0

6 votes

def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

6 votes

def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
    if grad is None:
        grad = nd.empty(theta.shape, theta.context)
    theta1 = theta.asnumpy()[0]
    theta2 = theta.asnumpy()[1]
    v1 = sigma1 ** 2
    v2 = sigma2 ** 2
    vx = sigmax ** 2
    denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp(
        -(X - theta1 - theta2) ** 2 / (2 * vx))
    grad_npy = numpy.zeros(theta.shape)
    grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
                                    + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
                                    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta1 / v1
    grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
    X - theta1 - theta2) / vx) / denominator).sum() \
                  + theta2 / v2
    grad[:] = grad_npy
    return grad

Source File: test_contrib_text.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_download_embed():
    @text.embedding.register
    class Test(text.embedding._TokenEmbedding):
        # 33 bytes.
        pretrained_file_name_sha1 = \
            {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'}
        namespace = 'test'

        def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs):
            pretrained_file_name = 'embedding_test.vec'
            Test._check_pretrained_file_names(pretrained_file_name)

            super(Test, self).__init__(**kwargs)

            pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name)

            self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)

    test_embed = text.embedding.create('test')
    assert test_embed.token_to_idx['hello'] == 1
    assert test_embed.token_to_idx['world'] == 2
    assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy())

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

6 votes

def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev())

Source File: test_contrib_text.py From SNIPER-mxnet with Apache License 2.0

6 votes

def test_download_embed():
    @text.embedding.register
    class Test(text.embedding._TokenEmbedding):
        # 33 bytes.
        pretrained_file_name_sha1 = \
            {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'}
        namespace = 'test'

        def __init__(self, embedding_root='embeddings', init_unknown_vec=nd.zeros, **kwargs):
            pretrained_file_name = 'embedding_test.vec'
            Test._check_pretrained_file_names(pretrained_file_name)

            super(Test, self).__init__(**kwargs)

            pretrained_file_path = Test._get_pretrained_file(embedding_root, pretrained_file_name)

            self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)

    test_embed = text.embedding.create('test')
    assert test_embed.token_to_idx['hello'] == 1
    assert test_embed.token_to_idx['world'] == 2
    assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy())
    assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy())

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def run_toy_SGLD():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = \
        SGLD(sym=net, data_inputs=data_inputs,
             X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
             initializer=initializer,
             learning_rate=1E-4,
             #         lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
             prior_precision=0.1,
             burn_in_iter_num=1000,
             thin_interval=10,
             task='regression',
             minibatch_size=minibatch_size, dev=dev())

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

5 votes

def run_mnist_SGD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             total_iter_num=1000000,
                             initializer=initializer,
                             lr=5E-6, prior_precision=1.0, minibatch_size=100)

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

5 votes

def run_toy_HMC():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = Y.shape[0]
    noise_precision = 1 / 9.0
    net = get_toy_sym(True, noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      sample_num=300000, initializer=initializer, prior_precision=1.0,
                      learning_rate=1E-3, L=10, dev=dev())

Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0

5 votes

def run_synthetic_SGLD():
    theta1 = 0
    theta2 = 1
    sigma1 = numpy.sqrt(10)
    sigma2 = 1
    sigmax = numpy.sqrt(2)
    X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100)
    minibatch_size = 1
    total_iter_num = 1000000
    lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num,
                                 factor=0.55)
    optimizer = mx.optimizer.create('sgld',
                                    learning_rate=None,
                                    rescale_grad=1.0,
                                    lr_scheduler=lr_scheduler,
                                    wd=0)
    updater = mx.optimizer.get_updater(optimizer)
    theta = mx.random.normal(0, 1, (2,), mx.cpu())
    grad = nd.empty((2,), mx.cpu())
    samples = numpy.zeros((2, total_iter_num))
    start = time.time()
    for i in xrange(total_iter_num):
        if (i + 1) % 100000 == 0:
            end = time.time()
            print("Iter:%d, Time spent: %f" % (i + 1, end - start))
            start = time.time()
        ind = numpy.random.randint(0, X.shape[0])
        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
        X.shape[0] / float(minibatch_size), grad=grad)
        updater('theta', grad, theta)
        samples[:, i] = theta.asnumpy()
    plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
    plt.colorbar()
    plt.show()

Source File: ytf.py From MaskInsightface with Apache License 2.0

5 votes

def get_feature(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)
  feature = np.mean(F, axis=0, keepdims=True)
  feature = sklearn.preprocessing.normalize(feature).flatten()

  feature_cache[key] = feature
  return feature

Source File: ytf.py From 1.FaceRecognition with MIT License

5 votes

def get_feature(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)
  feature = np.mean(F, axis=0, keepdims=True)
  feature = sklearn.preprocessing.normalize(feature).flatten()

  feature_cache[key] = feature
  return feature

Source File: parall_module_local_v1.py From 1.FaceRecognition with MIT License

5 votes

def get_ndarray2(self, context, name, arr):
      key = "%s_%s"%(name, context)
      #print(key)
      if not key in self._nd_cache:
        v = nd.zeros( shape=arr.shape, ctx = context)
        self._nd_cache[key] = v
      else:
        v = self._nd_cache[key]
      arr.copyto(v)
      return v

Source File: parall_module_local_v1.py From 1.FaceRecognition with MIT License

5 votes

def get_ndarray(self, context, name, shape):
      key = "%s_%s"%(name, context)
      #print(key)
      if not key in self._nd_cache:
        v = nd.zeros( shape=shape, ctx = context)
        self._nd_cache[key] = v
      else:
        v = self._nd_cache[key]
      return v

Source File: train_cgan.py From panoptic-fpn-gluon with Apache License 2.0

5 votes

def gan_loss(input,target_is_real):
        if target_is_real:
            target = nd.ones(input.shape,ctx=input.context)
        else:
            target = nd.zeros(input.shape, ctx=input.context)
        #mse loss for lsgan
        e = ((input - target) ** 2).mean(axis=0, exclude=True)
        return e

Source File: train_cgan.py From panoptic-fpn-gluon with Apache License 2.0

5 votes

def weights_init(layers):
    for layer in layers:
        classname = layer.__class__.__name__
        if hasattr(layer, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
            layer.weight.set_data(nd.random.normal(0.0,0.02,shape=layer.weight.data().shape))
            if hasattr(layer, 'bias') and layer.bias is not None:
                layer.bias.set_data(nd.zeros(layer.bias.data().shape))
        elif classname.find('BatchNorm') != -1:
            layer.gamma.set_data(nd.random.normal(1.0, 0.02,shape=layer.gamma.data().shape))
            layer.beta.set_data(nd.zeros(layer.bias.data().shape))

Source File: train_srgan.py From panoptic-fpn-gluon with Apache License 2.0

5 votes

def weights_init(params):
    for param_name in params:
        param = params[param_name]
        if param_name.find('conv') != -1:
            if param_name.find('weight') != -1:
                param.set_data(nd.random.normal(0.0,0.02,shape=param.data().shape))
            elif param_name.find('bias') != -1:
                param.set_data(nd.zeros(param.data().shape))
        elif param_name.find('batchnorm') != -1:
            if param_name.find('gamma') != -1:
                param.set_data(nd.random.normal(1.0, 0.02,shape=param.data().shape))
            elif param_name.find('beta') != -1:
                param.set_data(nd.zeros(param.data().shape))

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

5 votes

def run_mnist_SGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
                            thin_interval=100, burn_in_iter_num=1000)

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

5 votes

def run_synthetic_SGLD():
    theta1 = 0
    theta2 = 1
    sigma1 = numpy.sqrt(10)
    sigma2 = 1
    sigmax = numpy.sqrt(2)
    X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100)
    minibatch_size = 1
    total_iter_num = 1000000
    lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num,
                                 factor=0.55)
    optimizer = mx.optimizer.create('sgld',
                                    learning_rate=None,
                                    rescale_grad=1.0,
                                    lr_scheduler=lr_scheduler,
                                    wd=0)
    updater = mx.optimizer.get_updater(optimizer)
    theta = mx.random.normal(0, 1, (2,), mx.cpu())
    grad = nd.empty((2,), mx.cpu())
    samples = numpy.zeros((2, total_iter_num))
    start = time.time()
    for i in xrange(total_iter_num):
        if (i + 1) % 100000 == 0:
            end = time.time()
            print("Iter:%d, Time spent: %f" % (i + 1, end - start))
            start = time.time()
        ind = numpy.random.randint(0, X.shape[0])
        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
        X.shape[0] / float(minibatch_size), grad=grad)
        updater('theta', grad, theta)
        samples[:, i] = theta.asnumpy()
    plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
    plt.colorbar()
    plt.show()

Source File: base.py From training_results_v0.6 with Apache License 2.0

5 votes

def update_acc_grad(self):
        if self.acc_grad is None:
            self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx))
                                         for n, v in self.params_grad.items()])
        for k, v in self.acc_grad.items():
            v[:] = v + self.params_grad[k]

Source File: base.py From SNIPER-mxnet with Apache License 2.0

5 votes

def update_acc_grad(self):
        if self.acc_grad is None:
            self.acc_grad = OrderedDict([(n, nd.zeros(v.shape, ctx=self.ctx))
                                         for n, v in self.params_grad.items()])
        for k, v in self.acc_grad.items():
            v[:] = v + self.params_grad[k]

Source File: bdk_demo.py From SNIPER-mxnet with Apache License 2.0

5 votes

def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())

Source File: bdk_demo.py From training_results_v0.6 with Apache License 2.0

5 votes

def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def run_mnist_SGD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             total_iter_num=1000000,
                             initializer=initializer,
                             lr=5E-6, prior_precision=1.0, minibatch_size=100)

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def run_mnist_SGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
                            thin_interval=100, burn_in_iter_num=1000)

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def run_mnist_DistilledSGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    if training_num >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())

Source File: bdk_demo.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def run_toy_HMC():
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = Y.shape[0]
    noise_precision = 1 / 9.0
    net = get_toy_sym(True, noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = mx.init.Uniform(0.07)
    sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      sample_num=300000, initializer=initializer, prior_precision=1.0,
                      learning_rate=1E-3, L=10, dev=dev())

Python mxnet.ndarray.zeros() Examples