Python chainer.functions.average() Examples
The following are 30
code examples of chainer.functions.average().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.functions
, or try the search function
.
Example #1
Source File: subword.py From vecto with Mozilla Public License 2.0 | 6 votes |
def __init__(self, vocab, vocab_ngram_tokens, n_units, n_units_char, dropout, subword): # dropout ratio, zero indicates no dropout super(SUMAVG, self).__init__() with self.init_scope(): if subword.startswith('sum'): self.f_sumavg = F.sum if subword.startswith('avg'): self.f_sumavg = F.average self.embed = L.EmbedID( len(vocab_ngram_tokens.lst_words) + 2, n_units_char, initialW=I.Uniform(1. / n_units_char)) # ngram tokens embedding plus 2 for OOV and end symbol. self.n_ngram = vocab_ngram_tokens.metadata["max_gram"] - vocab_ngram_tokens.metadata["min_gram"] + 1 self.dropout = dropout self.vocab = vocab self.vocab_ngram_tokens = vocab_ngram_tokens
Example #2
Source File: test_average.py From chainer with MIT License | 6 votes |
def forward_expected(self, inputs): x, w = inputs if not self.use_weights: w = None y_expect = numpy.average(x, axis=self.axis, weights=w) if self.keepdims: # numpy.average does not support keepdims axis = self.axis if axis is None: axis = list(six.moves.range(x.ndim)) elif isinstance(axis, int): axis = axis, shape = list(x.shape) for i in six.moves.range(len(shape)): if i in axis or i - len(shape) in axis: shape[i] = 1 y_expect = y_expect.reshape(shape) y_expect = utils.force_array(y_expect, dtype=self.dtype) return y_expect,
Example #3
Source File: srcnn.py From waifu2x-chainer with MIT License | 6 votes |
def __call__(self, x): h = F.leaky_relu(self.conv1(x), self.slope) h = F.leaky_relu(self.conv2(h), self.slope) if hasattr(self, 'conv_bridge'): x = self.conv_bridge(x[:, :, 2:-2, 2:-2]) else: x = x[:, :, 2:-2, 2:-2] if hasattr(self, 'fc1') and hasattr(self, 'fc2'): se = F.relu(self.fc1(F.average(h, axis=(2, 3)))) se = F.sigmoid(self.fc2(se))[:, :, None, None] se = F.broadcast_to(se, h.shape) h = h * se return h + x
Example #4
Source File: model.py From graph-nvp with MIT License | 6 votes |
def log_prob(self, z, logdet): logdet[0] = logdet[0] - self.x_size logdet[1] = logdet[1] - self.adj_size ln_var_adj = self.ln_var * self.xp.ones([self.adj_size]) ln_var_x = self.ln_var * self.xp.ones([self.x_size]) nll_adj = F.average(F.sum(F.gaussian_nll(z[1], self.xp.zeros([self.adj_size], dtype=self.xp.float32), ln_var_adj, reduce='no'), axis=1) - logdet[1]) nll_adj /= self.adj_size nll_x = F.average(F.sum(F.gaussian_nll(z[0], self.xp.zeros([self.x_size], dtype=self.xp.float32), ln_var_x, reduce='no'), axis=1) - logdet[0]) nll_x /= self.x_size if nll_x.array < 0: print('nll_x:{}'.format(nll_x)) return [nll_x, nll_adj]
Example #5
Source File: visual_backprop.py From see with GNU General Public License v3.0 | 6 votes |
def scale_layer(self, feature_map, node): input_data = node.inputs[0].data _, _, in_height, in_width = input_data.shape _, _, feature_height, feature_width = feature_map.shape kernel_height = in_height + 2 * node.ph - node.sy * (feature_height - 1) kernel_width = in_width + 2 * node.pw - node.sx * (feature_width - 1) scaled_feature = F.deconvolution_2d( feature_map, self.xp.ones((1, 1, kernel_height, kernel_width)), stride=(node.sy, node.sx), pad=(node.ph, node.pw), outsize=(in_height, in_width), ) averaged_feature_map = F.average(input_data, axis=1, keepdims=True) feature_map = scaled_feature * averaged_feature_map return feature_map
Example #6
Source File: visual_backprop.py From kiss with GNU General Public License v3.0 | 6 votes |
def scale_layer(self, feature_map, node): input_data = node.inputs[0].data _, _, in_height, in_width = input_data.shape _, _, feature_height, feature_width = feature_map.shape kernel_height = in_height + 2 * node.ph - node.sy * (feature_height - 1) kernel_width = in_width + 2 * node.pw - node.sx * (feature_width - 1) scaled_feature = F.deconvolution_2d( feature_map, self.xp.ones((1, 1, kernel_height, kernel_width)), stride=(node.sy, node.sx), pad=(node.ph, node.pw), outsize=(in_height, in_width), ) averaged_feature_map = F.average(input_data, axis=1, keepdims=True) feature_map = scaled_feature * averaged_feature_map return feature_map
Example #7
Source File: se_resnet.py From chainercv with MIT License | 5 votes |
def __init__(self, n_layer, n_class=None, pretrained_model=None, mean=None, initialW=None, fc_kwargs={}): blocks = self._blocks[n_layer] param, path = utils.prepare_pretrained_model( {'n_class': n_class, 'mean': mean}, pretrained_model, self._models[n_layer], {'n_class': 1000, 'mean': _imagenet_mean}) self.mean = param['mean'] if initialW is None: initialW = initializers.HeNormal(scale=1., fan_option='fan_out') if 'initialW' not in fc_kwargs: fc_kwargs['initialW'] = initializers.Normal(scale=0.01) if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. initialW = initializers.constant.Zero() fc_kwargs['initialW'] = initializers.constant.Zero() kwargs = { 'initialW': initialW, 'stride_first': True, 'add_seblock': True} super(SEResNet, self).__init__() with self.init_scope(): self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=True, initialW=initialW) self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2) self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs) self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs) self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs) self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs) self.pool5 = lambda x: F.average(x, axis=(2, 3)) self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs) self.prob = F.softmax if path: chainer.serializers.load_npz(path, self)
Example #8
Source File: angular_loss.py From deep_metric_learning with MIT License | 5 votes |
def angular_mc_loss(f, f_p, alpha=45, in_degree=True): ''' Args: f (chainer.Variable or xp.npdarray): Anchor vectors. Each vectors in f must be l2 normalized. f_p (chainer.Variable or xp.npdarray): Positive vectors. Each vectors in f must be l2 normalized. ''' xp = cuda.get_array_module(f) if in_degree: alpha = np.deg2rad(alpha) sq_tan_alpha = np.tan(alpha) ** 2 n_pairs = len(f) # first and second term of f_{a,p,n} term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p)) term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True) # term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1) f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs)) # multiply zero to diagonal components of f_apn mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype) f_apn = f_apn * mask return F.average(F.logsumexp(f_apn, axis=1))
Example #9
Source File: proxy_nca_loss.py From deep_metric_learning with MIT License | 5 votes |
def proxy_nca_loss(x, proxy, labels): """Proxy-NCA loss function. Args: x (:class:`~chainer.Variable`): L2 normalized anchor points whose shape is (B, D), where B is the batch size and D is the number of dimensions of feature vector. proxy (:class:`~chainer.Variable` or :class:`~chainer.Parameter`): Proxies whose shape is (K, D), where K is the number of classes in the dataset. labels (:class:`numpy.ndarray`): Class labels associated to x. The shape is (B,) and dtype is int. Note that the class IDs must be 0, 1, ..., K-1. Returns: :class:`~chainer.Variable`: Loss value. See: `No Fuss Distance Metric Learning using Proxies \ <http://openaccess.thecvf.com/content_ICCV_2017/papers/\ Movshovitz-Attias_No_Fuss_Distance_ICCV_2017_paper.pdf>`_ """ proxy = F.normalize(proxy) distance = squared_distance_matrix(x, proxy) d_posi = distance[np.arange(len(x)), labels] # For each row, remove one element corresponding to the positive distance B, K = distance.shape # batch size and the number of classes mask = np.tile(np.arange(K), (B, 1)) != labels[:, None] d_nega = distance[mask].reshape(B, K - 1) log_denominator = F.logsumexp(-d_nega, axis=1) loss = d_posi + log_denominator return F.average(loss)
Example #10
Source File: updater.py From 3dpose_gan with MIT License | 5 votes |
def calculate_heuristic_loss(xy_real, z_pred): return F.average(F.relu( -H36M_Updater.calculate_rotation(xy_real, z_pred)))
Example #11
Source File: loss_functions.py From chainer-gan-experiments with MIT License | 5 votes |
def loss_sigmoid_cross_entropy_with_logits(x, t): return F.average(x - x*t + F.softplus(-x))# / x.data.shape[0]
Example #12
Source File: gail.py From baselines with MIT License | 5 votes |
def _get_entropy(self, values): return F.average((-values * F.log2(F.sigmoid(values) + self.discriminator_value_offset) - (1 - values) * F.log2(1 - F.sigmoid(values) + self.discriminator_value_offset))) # NOQA
Example #13
Source File: behavioral_cloning.py From baselines with MIT License | 5 votes |
def _loss(self, batch_obs, batch_acs): out = self.model(batch_obs) entropy = F.average(out.entropy) if self.action_wrapper == 'discrete': loss = F.softmax_cross_entropy(out.params[0], batch_acs.reshape(-1)) elif self.action_wrapper == 'continuous': loss = F.mean_squared_error(out.params[0], batch_acs) elif self.action_wrapper == 'multi-dimensional-softmax': loss = 0 for idx, logit in enumerate(out.params): expected = batch_acs[:, idx] loss += F.softmax_cross_entropy(logit, expected) loss -= entropy * self.entropy_coef return loss
Example #14
Source File: visual_backprop.py From kiss with GNU General Public License v3.0 | 5 votes |
def perform_visual_backprop(self, variable): with chainer.no_backprop_mode(), chainer.cuda.get_device_from_array(variable.data): self.xp = cuda.get_array_module(variable) averaged_feature = F.average(variable, axis=1, keepdims=True) visualization = self.traverse_computational_graph(variable.creator, averaged_feature) visualization = visualization.data for i in range(len(visualization)): min_val = visualization[i].min() max_val = visualization[i].max() visualization[i] -= min_val visualization[i] *= 1.0 / (max_val - min_val) return visualization
Example #15
Source File: bbox_plotter.py From kiss with GNU General Public License v3.0 | 5 votes |
def show_feature_map(self, feature_map): with chainer.no_backprop_mode(): averaged_feature_map = F.average(feature_map, axis=1, keepdims=True)[0] averaged_feature_map -= averaged_feature_map.data.min() max_value = averaged_feature_map.data.max() if max_value > 0: averaged_feature_map /= max_value return averaged_feature_map[None, ...].data
Example #16
Source File: aspp.py From chainercv with MIT License | 5 votes |
def image_pooling(self, x): _, _, H, W = x.shape x = F.average(x, axis=(2, 3), keepdims=True) x = self.image_pooling_conv(x) B, C, _, _ = x.shape x = F.broadcast_to(x, (B, C, H, W)) return x
Example #17
Source File: se_resnext.py From chainercv with MIT License | 5 votes |
def __init__(self, n_layer, n_class=None, pretrained_model=None, mean=None, initialW=None, fc_kwargs={}): blocks = self._blocks[n_layer] param, path = utils.prepare_pretrained_model( {'n_class': n_class, 'mean': mean}, pretrained_model, self._models[n_layer], {'n_class': 1000, 'mean': _imagenet_mean}) self.mean = param['mean'] if initialW is None: initialW = initializers.HeNormal(scale=1., fan_option='fan_out') if 'initialW' not in fc_kwargs: fc_kwargs['initialW'] = initializers.Normal(scale=0.01) if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. initialW = initializers.constant.Zero() fc_kwargs['initialW'] = initializers.constant.Zero() kwargs = { 'groups': 32, 'initialW': initialW, 'stride_first': False, 'add_seblock': True} super(SEResNeXt, self).__init__() with self.init_scope(): self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=True, initialW=initialW) self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2) self.res2 = ResBlock(blocks[0], None, 128, 256, 1, **kwargs) self.res3 = ResBlock(blocks[1], None, 256, 512, 2, **kwargs) self.res4 = ResBlock(blocks[2], None, 512, 1024, 2, **kwargs) self.res5 = ResBlock(blocks[3], None, 1024, 2048, 2, **kwargs) self.pool5 = lambda x: F.average(x, axis=(2, 3)) self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs) self.prob = F.softmax if path: chainer.serializers.load_npz(path, self)
Example #18
Source File: seblock.py From chainer-compiler with MIT License | 5 votes |
def forward(self, u): B, C, H, W = u.shape z = F.average(u, axis=(2, 3)) x = F.relu(self.down(z)) x = F.sigmoid(self.up(x)) x = F.reshape(x, x.shape[:2] + (1, 1)) # Spatial axes of `x` will be broadcasted. return u * x
Example #19
Source File: fcis_resnet101.py From chainercv with MIT License | 5 votes |
def _pool( self, h_cls_seg, h_ag_loc, rois, roi_indices, gt_roi_labels): # PSROI Pooling # shape: (n_roi, n_class, 2, roi_size, roi_size) roi_cls_ag_seg_scores = ps_roi_average_pooling_2d( h_cls_seg, rois, roi_indices, (self.n_class * 2, self.roi_size, self.roi_size), self.spatial_scale, self.group_size) roi_cls_ag_seg_scores = F.reshape( roi_cls_ag_seg_scores, (-1, self.n_class, 2, self.roi_size, self.roi_size)) # shape: (n_roi, 2*4, roi_size, roi_size) roi_ag_loc_scores = ps_roi_average_pooling_2d( h_ag_loc, rois, roi_indices, (2 * 4, self.roi_size, self.roi_size), self.spatial_scale, self.group_size) # shape: (n_roi, n_class) roi_cls_scores = F.average( F.max(roi_cls_ag_seg_scores, axis=2), axis=(2, 3)) # Bbox Regression # shape: (n_roi, 2, 4) roi_ag_locs = F.average(roi_ag_loc_scores, axis=(2, 3)) roi_ag_locs = F.reshape(roi_ag_locs, (-1, 2, 4)) # Mask Regression # shape: (n_roi, n_class, 2, roi_size, roi_size) if gt_roi_labels is None: max_cls_indices = roi_cls_scores.array.argmax(axis=1) else: max_cls_indices = gt_roi_labels # shape: (n_roi, 2, roi_size, roi_size) roi_ag_seg_scores = roi_cls_ag_seg_scores[ self.xp.arange(len(max_cls_indices)), max_cls_indices] return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores
Example #20
Source File: visual_backprop.py From see with GNU General Public License v3.0 | 5 votes |
def perform_visual_backprop(self, variable): with chainer.no_backprop_mode(), chainer.cuda.get_device_from_array(variable.data): self.xp = cuda.get_array_module(variable) averaged_feature = F.average(variable, axis=1, keepdims=True) visualization = self.traverse_computational_graph(variable.creator, averaged_feature) visualization = visualization.data for i in range(len(visualization)): min_val = visualization[i].min() max_val = visualization[i].max() visualization[i] -= min_val visualization[i] *= 1.0 / (max_val - min_val) return visualization
Example #21
Source File: loss_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_height_loss(self, height): # penalize bboxes that are not high enough to contain text (10 pixels) shifted_height = height - 10 thresholded_height = F.minimum(shifted_height, self.xp.zeros_like(shifted_height)) thresholded_height *= -1 return F.average(thresholded_height)
Example #22
Source File: loss_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_direction_loss(self, grids): top_left_x, top_right_x, _, top_left_y, _, bottom_left_y = self.get_corners(grids) # penalize upside down images distance = top_left_y - bottom_left_y loss_values = F.maximum(distance, self.xp.zeros_like(distance)) up_down_loss = F.average(loss_values) # penalize images that are vertically mirrored distance = top_left_x - top_right_x loss_values = F.maximum(distance, self.xp.zeros_like(distance)) left_right_loss = F.average(loss_values) return up_down_loss + left_right_loss
Example #23
Source File: test_average.py From chainer with MIT License | 5 votes |
def test_duplicate_value_negative(self): x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype) with self.assertRaises(ValueError): functions.average(x, axis=(1, -2))
Example #24
Source File: test_average.py From chainer with MIT License | 5 votes |
def test_duplicate_value(self): x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype) with self.assertRaises(ValueError): functions.average(x, axis=(0, 0))
Example #25
Source File: test_average.py From chainer with MIT License | 5 votes |
def forward(self, inputs, device): x, = inputs y = functions.average( x, self.axis, keepdims=self.keepdims) return y,
Example #26
Source File: test_average.py From chainer with MIT License | 5 votes |
def forward(self, inputs, device): x, w = inputs if not self.use_weights: w = None if self.use_variable_method: y = x.mean(axis=self.axis, weights=w, keepdims=self.keepdims) else: y = functions.average( x, axis=self.axis, weights=w, keepdims=self.keepdims) return y,
Example #27
Source File: resnet.py From chainercv with MIT License | 4 votes |
def __init__(self, n_layer, n_class=None, pretrained_model=None, mean=None, initialW=None, fc_kwargs={}, arch='fb'): if arch == 'fb': stride_first = False conv1_no_bias = True elif arch == 'he': stride_first = True # Kaiming He uses bias only for ResNet50 conv1_no_bias = n_layer != 50 else: raise ValueError('arch is expected to be one of [\'he\', \'fb\']') blocks = self._blocks[n_layer] param, path = utils.prepare_pretrained_model( {'n_class': n_class, 'mean': mean}, pretrained_model, self._models[arch][n_layer], {'n_class': 1000, 'mean': _imagenet_mean}) self.mean = param['mean'] if initialW is None: initialW = initializers.HeNormal(scale=1., fan_option='fan_out') if 'initialW' not in fc_kwargs: fc_kwargs['initialW'] = initializers.Normal(scale=0.01) if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. initialW = initializers.constant.Zero() fc_kwargs['initialW'] = initializers.constant.Zero() kwargs = {'initialW': initialW, 'stride_first': stride_first} super(ResNet, self).__init__() with self.init_scope(): self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=conv1_no_bias, initialW=initialW) self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2) self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs) self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs) self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs) self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs) self.pool5 = lambda x: F.average(x, axis=(2, 3)) self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs) self.prob = F.softmax if path: chainer.serializers.load_npz(path, self)
Example #28
Source File: gail.py From baselines with MIT License | 4 votes |
def _loss(self, fake_batch_obs, fake_batch_action, true_batch_obs, true_batch_action): if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(fake_batch_obs, update=False) infer_fake = self.model(normalized_obs, fake_batch_action) else: infer_fake = self.model(fake_batch_obs, fake_batch_action) if self.noisy_label: n = fake_batch_obs.shape[0] fake_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_fake)) + self.discriminator_value_offset)) else: fake_loss = -F.average(F.log(1 - F.sigmoid(infer_fake) + self.discriminator_value_offset)) if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(true_batch_obs, update=True) infer_true = self.model(normalized_obs, true_batch_action) else: infer_true = self.model(true_batch_obs, true_batch_action) if self.noisy_label: n = true_batch_obs.shape[0] true_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_true)) + self.discriminator_value_offset)) else: true_loss = -F.average(F.log(F.sigmoid(infer_true) + self.discriminator_value_offset)) entropy = (self._get_entropy(infer_fake) / 2 + self._get_entropy(infer_true) / 2) loss = (fake_loss + true_loss - entropy * self.entropy_coef) # Update stats self.accuracy_gen = np.average( chainer.cuda.to_cpu(infer_fake.array) < 0) self.accuracy_exp = np.average( chainer.cuda.to_cpu(infer_true.array) > 0) self.average_entropy *= self.entropy_decay self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array) # noqa self.average_loss *= self.loss_decay self.average_loss += (1.0 - self.loss_decay) * \ chainer.cuda.to_cpu(loss.array) return loss
Example #29
Source File: updater.py From chainer-gan-experiments with MIT License | 4 votes |
def update_core(self): xp = self.gen.xp self._iter += 1 opt_d = self.get_optimizer('dis') for i in range(self._dis_iter): d_fake = self.get_fake_image_batch() d_real = self.get_real_image_batch() y_fake = self.dis(Variable(d_fake), test=False) y_real = self.dis(Variable(d_real), test=False) w1 = F.average(y_fake-y_real) loss_dis = w1 if self._mode == 'gp': eta = np.random.rand() c = (d_real * eta + (1.0 - eta) * d_fake).astype('f') y = self.dis(Variable(c), test=False, retain_forward=True) g = xp.ones_like(y.data) grad_c = self.dis.differentiable_backward(Variable(g)) grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3))) loss_gp = loss_l2(grad_c_l2, 1.0) loss_dis += self._lambda_gp * loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() if self._mode == 'clip': self.dis.clip() chainer.report({'loss': loss_dis,'loss_w1': w1}, self.dis) z_in = self.get_latent_code_batch() x_out = self.gen(Variable(z_in), test=False) opt_g = self.get_optimizer('gen') y_fake = self.dis(x_out, test=False) loss_gen = - F.average(y_fake) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update()
Example #30
Source File: updater.py From chainer-PGGAN with MIT License | 4 votes |
def update_core(self): opt_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') xp = self.gen.xp # update discriminator x = self.get_iterator('main').next() x = xp.array(x) m = len(x) z = self.gen.z(m) x_tilde = self.gen(z, self.alpha).data epsilon = xp.random.rand(m, 1, 1, 1).astype('f') x_hat = Variable(epsilon * x + (1 - epsilon) * x_tilde) dis_x = self.dis(x, self.alpha) loss_d = self.dis(x_tilde, self.alpha) - dis_x g_d, = chainer.grad([self.dis(x_hat, self.alpha)], [x_hat], enable_double_backprop=True) g_d_norm = F.sqrt(F.batch_l2_norm_squared(g_d) + 1e-6) g_d_norm_delta = g_d_norm - 1 loss_l = self.lam * g_d_norm_delta * g_d_norm_delta loss_dr = self.epsilon_drift * dis_x * dis_x dis_loss = F.mean(loss_d + loss_l + loss_dr) self.dis.cleargrads() dis_loss.backward() opt_d.update() # update generator z = self.gen.z(m) x = self.gen(z, self.alpha) gen_loss = F.average(-self.dis(x, self.alpha)) self.gen.cleargrads() gen_loss.backward() opt_g.update() reporter.report({'loss_d': F.mean(loss_d), 'loss_l': F.mean(loss_l), 'loss_dr': F.mean(loss_dr), 'dis_loss': dis_loss, 'gen_loss': gen_loss, 'alpha': self.alpha}) self.alpha = self.alpha + self.delta