Python Examples of chainer.functions.average

Source File: subword.py From vecto with Mozilla Public License 2.0

6 votes

def __init__(self, vocab, vocab_ngram_tokens, n_units, n_units_char, dropout,
                 subword):  # dropout ratio, zero indicates no dropout
        super(SUMAVG, self).__init__()
        with self.init_scope():
            if subword.startswith('sum'):
                self.f_sumavg = F.sum
            if subword.startswith('avg'):
                self.f_sumavg = F.average

            self.embed = L.EmbedID(
                len(vocab_ngram_tokens.lst_words) + 2, n_units_char,
                initialW=I.Uniform(1. / n_units_char))  # ngram tokens embedding  plus 2 for OOV and end symbol.

            self.n_ngram = vocab_ngram_tokens.metadata["max_gram"] - vocab_ngram_tokens.metadata["min_gram"] + 1
            self.dropout = dropout
            self.vocab = vocab
            self.vocab_ngram_tokens = vocab_ngram_tokens

Source File: test_average.py From chainer with MIT License

6 votes

def forward_expected(self, inputs):
        x, w = inputs
        if not self.use_weights:
            w = None
        y_expect = numpy.average(x, axis=self.axis, weights=w)
        if self.keepdims:
            # numpy.average does not support keepdims
            axis = self.axis
            if axis is None:
                axis = list(six.moves.range(x.ndim))
            elif isinstance(axis, int):
                axis = axis,
            shape = list(x.shape)
            for i in six.moves.range(len(shape)):
                if i in axis or i - len(shape) in axis:
                    shape[i] = 1
            y_expect = y_expect.reshape(shape)
        y_expect = utils.force_array(y_expect, dtype=self.dtype)
        return y_expect,

Source File: srcnn.py From waifu2x-chainer with MIT License

6 votes

def __call__(self, x):
        h = F.leaky_relu(self.conv1(x), self.slope)
        h = F.leaky_relu(self.conv2(h), self.slope)

        if hasattr(self, 'conv_bridge'):
            x = self.conv_bridge(x[:, :, 2:-2, 2:-2])
        else:
            x = x[:, :, 2:-2, 2:-2]

        if hasattr(self, 'fc1') and hasattr(self, 'fc2'):
            se = F.relu(self.fc1(F.average(h, axis=(2, 3))))
            se = F.sigmoid(self.fc2(se))[:, :, None, None]
            se = F.broadcast_to(se, h.shape)
            h = h * se

        return h + x

Source File: model.py From graph-nvp with MIT License

6 votes

def log_prob(self, z, logdet):
        logdet[0] = logdet[0] - self.x_size
        logdet[1] = logdet[1] - self.adj_size
        ln_var_adj = self.ln_var * self.xp.ones([self.adj_size])
        ln_var_x = self.ln_var * self.xp.ones([self.x_size])
        nll_adj = F.average(F.sum(F.gaussian_nll(z[1], self.xp.zeros([self.adj_size], dtype=self.xp.float32),
                                                 ln_var_adj, reduce='no'), axis=1) - logdet[1])
        nll_adj /= self.adj_size

        nll_x = F.average(F.sum(F.gaussian_nll(z[0], self.xp.zeros([self.x_size], dtype=self.xp.float32),
                                               ln_var_x, reduce='no'), axis=1) - logdet[0])
        nll_x /= self.x_size
        if nll_x.array < 0:
            print('nll_x:{}'.format(nll_x))

        return [nll_x, nll_adj]

Source File: visual_backprop.py From see with GNU General Public License v3.0

6 votes

def scale_layer(self, feature_map, node):
        input_data = node.inputs[0].data
        _, _, in_height, in_width = input_data.shape
        _, _, feature_height, feature_width = feature_map.shape
        kernel_height = in_height + 2 * node.ph - node.sy * (feature_height - 1)
        kernel_width = in_width + 2 * node.pw - node.sx * (feature_width - 1)
        scaled_feature = F.deconvolution_2d(
            feature_map,
            self.xp.ones((1, 1, kernel_height, kernel_width)),
            stride=(node.sy, node.sx),
            pad=(node.ph, node.pw),
            outsize=(in_height, in_width),
        )
        averaged_feature_map = F.average(input_data, axis=1, keepdims=True)
        feature_map = scaled_feature * averaged_feature_map
        return feature_map

Source File: visual_backprop.py From kiss with GNU General Public License v3.0

6 votes

def scale_layer(self, feature_map, node):
        input_data = node.inputs[0].data
        _, _, in_height, in_width = input_data.shape
        _, _, feature_height, feature_width = feature_map.shape
        kernel_height = in_height + 2 * node.ph - node.sy * (feature_height - 1)
        kernel_width = in_width + 2 * node.pw - node.sx * (feature_width - 1)
        scaled_feature = F.deconvolution_2d(
            feature_map,
            self.xp.ones((1, 1, kernel_height, kernel_width)),
            stride=(node.sy, node.sx),
            pad=(node.ph, node.pw),
            outsize=(in_height, in_width),
        )
        averaged_feature_map = F.average(input_data, axis=1, keepdims=True)
        feature_map = scaled_feature * averaged_feature_map
        return feature_map

Source File: se_resnet.py From chainercv with MIT License

5 votes

def __init__(self, n_layer,
                 n_class=None,
                 pretrained_model=None,
                 mean=None, initialW=None, fc_kwargs={}):
        blocks = self._blocks[n_layer]

        param, path = utils.prepare_pretrained_model(
            {'n_class': n_class, 'mean': mean},
            pretrained_model, self._models[n_layer],
            {'n_class': 1000, 'mean': _imagenet_mean})
        self.mean = param['mean']

        if initialW is None:
            initialW = initializers.HeNormal(scale=1., fan_option='fan_out')
        if 'initialW' not in fc_kwargs:
            fc_kwargs['initialW'] = initializers.Normal(scale=0.01)
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            initialW = initializers.constant.Zero()
            fc_kwargs['initialW'] = initializers.constant.Zero()
        kwargs = {
            'initialW': initialW, 'stride_first': True, 'add_seblock': True}

        super(SEResNet, self).__init__()
        with self.init_scope():
            self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=True,
                                       initialW=initialW)
            self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2)
            self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs)
            self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs)
            self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs)
            self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs)
            self.pool5 = lambda x: F.average(x, axis=(2, 3))
            self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs)
            self.prob = F.softmax

        if path:
            chainer.serializers.load_npz(path, self)

Source File: angular_loss.py From deep_metric_learning with MIT License

5 votes

def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))

Source File: proxy_nca_loss.py From deep_metric_learning with MIT License

5 votes

def proxy_nca_loss(x, proxy, labels):
    """Proxy-NCA loss function.

    Args:
        x (:class:`~chainer.Variable`):
            L2 normalized anchor points whose shape is (B, D), where B is the
            batch size and D is the number of dimensions of feature vector.
        proxy (:class:`~chainer.Variable` or :class:`~chainer.Parameter`):
            Proxies whose shape is (K, D), where K is the number of classes
            in the dataset.
        labels (:class:`numpy.ndarray`):
            Class labels associated to x. The shape is (B,) and dtype is int.
            Note that the class IDs must be 0, 1, ..., K-1.

    Returns:
        :class:`~chainer.Variable`: Loss value.

    See: `No Fuss Distance Metric Learning using Proxies \
        <http://openaccess.thecvf.com/content_ICCV_2017/papers/\
        Movshovitz-Attias_No_Fuss_Distance_ICCV_2017_paper.pdf>`_
    """
    proxy = F.normalize(proxy)
    distance = squared_distance_matrix(x, proxy)
    d_posi = distance[np.arange(len(x)), labels]

    # For each row, remove one element corresponding to the positive distance
    B, K = distance.shape  # batch size and the number of classes
    mask = np.tile(np.arange(K), (B, 1)) != labels[:, None]
    d_nega = distance[mask].reshape(B, K - 1)

    log_denominator = F.logsumexp(-d_nega, axis=1)
    loss = d_posi + log_denominator
    return F.average(loss)

Source File: updater.py From 3dpose_gan with MIT License

5 votes

def calculate_heuristic_loss(xy_real, z_pred):
        return F.average(F.relu(
            -H36M_Updater.calculate_rotation(xy_real, z_pred)))

Source File: loss_functions.py From chainer-gan-experiments with MIT License

5 votes

def loss_sigmoid_cross_entropy_with_logits(x, t):
    return F.average(x - x*t + F.softplus(-x))# / x.data.shape[0]

Source File: gail.py From baselines with MIT License

5 votes

def _get_entropy(self, values):
        return F.average((-values * F.log2(F.sigmoid(values)
                                           + self.discriminator_value_offset)
                         - (1 - values) * F.log2(1
                                                 - F.sigmoid(values)
                                                 + self.discriminator_value_offset)))  # NOQA

Source File: behavioral_cloning.py From baselines with MIT License

5 votes

def _loss(self, batch_obs, batch_acs):
        out = self.model(batch_obs)
        entropy = F.average(out.entropy)
        if self.action_wrapper == 'discrete':
            loss = F.softmax_cross_entropy(out.params[0], batch_acs.reshape(-1))
        elif self.action_wrapper == 'continuous':
            loss = F.mean_squared_error(out.params[0], batch_acs)
        elif self.action_wrapper == 'multi-dimensional-softmax':
            loss = 0
            for idx, logit in enumerate(out.params):
                expected = batch_acs[:, idx]
                loss += F.softmax_cross_entropy(logit, expected)
        loss -= entropy * self.entropy_coef
        return loss

Source File: visual_backprop.py From kiss with GNU General Public License v3.0

5 votes

def perform_visual_backprop(self, variable):
        with chainer.no_backprop_mode(), chainer.cuda.get_device_from_array(variable.data):
            self.xp = cuda.get_array_module(variable)
            averaged_feature = F.average(variable, axis=1, keepdims=True)

            visualization = self.traverse_computational_graph(variable.creator, averaged_feature)
            visualization = visualization.data
            for i in range(len(visualization)):
                min_val = visualization[i].min()
                max_val = visualization[i].max()
                visualization[i] -= min_val
                visualization[i] *= 1.0 / (max_val - min_val)
        return visualization

Source File: bbox_plotter.py From kiss with GNU General Public License v3.0

5 votes

def show_feature_map(self, feature_map):
        with chainer.no_backprop_mode():
            averaged_feature_map = F.average(feature_map, axis=1, keepdims=True)[0]
            averaged_feature_map -= averaged_feature_map.data.min()
            max_value = averaged_feature_map.data.max()
            if max_value > 0:
                averaged_feature_map /= max_value
        return averaged_feature_map[None, ...].data

Source File: aspp.py From chainercv with MIT License

5 votes

def image_pooling(self, x):
        _, _, H, W = x.shape
        x = F.average(x, axis=(2, 3), keepdims=True)
        x = self.image_pooling_conv(x)
        B, C, _, _ = x.shape
        x = F.broadcast_to(x, (B, C, H, W))
        return x

Source File: se_resnext.py From chainercv with MIT License

5 votes

def __init__(self, n_layer,
                 n_class=None,
                 pretrained_model=None,
                 mean=None, initialW=None, fc_kwargs={}):
        blocks = self._blocks[n_layer]

        param, path = utils.prepare_pretrained_model(
            {'n_class': n_class, 'mean': mean},
            pretrained_model, self._models[n_layer],
            {'n_class': 1000, 'mean': _imagenet_mean})
        self.mean = param['mean']

        if initialW is None:
            initialW = initializers.HeNormal(scale=1., fan_option='fan_out')
        if 'initialW' not in fc_kwargs:
            fc_kwargs['initialW'] = initializers.Normal(scale=0.01)
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            initialW = initializers.constant.Zero()
            fc_kwargs['initialW'] = initializers.constant.Zero()
        kwargs = {
            'groups': 32, 'initialW': initialW, 'stride_first': False,
            'add_seblock': True}

        super(SEResNeXt, self).__init__()
        with self.init_scope():
            self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=True,
                                       initialW=initialW)
            self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2)
            self.res2 = ResBlock(blocks[0], None, 128, 256, 1, **kwargs)
            self.res3 = ResBlock(blocks[1], None, 256, 512, 2, **kwargs)
            self.res4 = ResBlock(blocks[2], None, 512, 1024, 2, **kwargs)
            self.res5 = ResBlock(blocks[3], None, 1024, 2048, 2, **kwargs)
            self.pool5 = lambda x: F.average(x, axis=(2, 3))
            self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs)
            self.prob = F.softmax

        if path:
            chainer.serializers.load_npz(path, self)

Source File: seblock.py From chainer-compiler with MIT License

5 votes

def forward(self, u):
        B, C, H, W = u.shape

        z = F.average(u, axis=(2, 3))
        x = F.relu(self.down(z))
        x = F.sigmoid(self.up(x))
        x = F.reshape(x, x.shape[:2] + (1, 1))
        # Spatial axes of `x` will be broadcasted.
        return u * x

Source File: fcis_resnet101.py From chainercv with MIT License

5 votes

def _pool(
            self, h_cls_seg, h_ag_loc, rois, roi_indices, gt_roi_labels):
        # PSROI Pooling
        # shape: (n_roi, n_class, 2, roi_size, roi_size)
        roi_cls_ag_seg_scores = ps_roi_average_pooling_2d(
            h_cls_seg, rois, roi_indices,
            (self.n_class * 2, self.roi_size, self.roi_size),
            self.spatial_scale, self.group_size)
        roi_cls_ag_seg_scores = F.reshape(
            roi_cls_ag_seg_scores,
            (-1, self.n_class, 2, self.roi_size, self.roi_size))

        # shape: (n_roi, 2*4, roi_size, roi_size)
        roi_ag_loc_scores = ps_roi_average_pooling_2d(
            h_ag_loc, rois, roi_indices,
            (2 * 4, self.roi_size, self.roi_size),
            self.spatial_scale, self.group_size)

        # shape: (n_roi, n_class)
        roi_cls_scores = F.average(
            F.max(roi_cls_ag_seg_scores, axis=2), axis=(2, 3))

        # Bbox Regression
        # shape: (n_roi, 2, 4)
        roi_ag_locs = F.average(roi_ag_loc_scores, axis=(2, 3))
        roi_ag_locs = F.reshape(roi_ag_locs, (-1, 2, 4))

        # Mask Regression
        # shape: (n_roi, n_class, 2, roi_size, roi_size)
        if gt_roi_labels is None:
            max_cls_indices = roi_cls_scores.array.argmax(axis=1)
        else:
            max_cls_indices = gt_roi_labels

        # shape: (n_roi, 2, roi_size, roi_size)
        roi_ag_seg_scores = roi_cls_ag_seg_scores[
            self.xp.arange(len(max_cls_indices)), max_cls_indices]

        return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores

Source File: visual_backprop.py From see with GNU General Public License v3.0

5 votes

def perform_visual_backprop(self, variable):
        with chainer.no_backprop_mode(), chainer.cuda.get_device_from_array(variable.data):
            self.xp = cuda.get_array_module(variable)
            averaged_feature = F.average(variable, axis=1, keepdims=True)

            visualization = self.traverse_computational_graph(variable.creator, averaged_feature)
            visualization = visualization.data
            for i in range(len(visualization)):
                min_val = visualization[i].min()
                max_val = visualization[i].max()
                visualization[i] -= min_val
                visualization[i] *= 1.0 / (max_val - min_val)
        return visualization

Source File: loss_metrics.py From see with GNU General Public License v3.0

5 votes

def calc_height_loss(self, height):
        # penalize bboxes that are not high enough to contain text (10 pixels)
        shifted_height = height - 10
        thresholded_height = F.minimum(shifted_height, self.xp.zeros_like(shifted_height))
        thresholded_height *= -1

        return F.average(thresholded_height)

Source File: loss_metrics.py From see with GNU General Public License v3.0

5 votes

def calc_direction_loss(self, grids):
        top_left_x, top_right_x, _, top_left_y, _, bottom_left_y = self.get_corners(grids)

        # penalize upside down images
        distance = top_left_y - bottom_left_y
        loss_values = F.maximum(distance, self.xp.zeros_like(distance))
        up_down_loss = F.average(loss_values)

        # penalize images that are vertically mirrored
        distance = top_left_x - top_right_x
        loss_values = F.maximum(distance, self.xp.zeros_like(distance))
        left_right_loss = F.average(loss_values)

        return up_down_loss + left_right_loss

Source File: test_average.py From chainer with MIT License

5 votes

def test_duplicate_value_negative(self):
        x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype)
        with self.assertRaises(ValueError):
            functions.average(x, axis=(1, -2))

Source File: test_average.py From chainer with MIT License

5 votes

def test_duplicate_value(self):
        x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype)
        with self.assertRaises(ValueError):
            functions.average(x, axis=(0, 0))

Source File: test_average.py From chainer with MIT License

5 votes

def forward(self, inputs, device):
        x, = inputs
        y = functions.average(
            x, self.axis, keepdims=self.keepdims)
        return y,

Source File: test_average.py From chainer with MIT License

5 votes

def forward(self, inputs, device):
        x, w = inputs
        if not self.use_weights:
            w = None
        if self.use_variable_method:
            y = x.mean(axis=self.axis, weights=w, keepdims=self.keepdims)
        else:
            y = functions.average(
                x, axis=self.axis, weights=w, keepdims=self.keepdims)
        return y,

Source File: resnet.py From chainercv with MIT License

4 votes

def __init__(self, n_layer,
                 n_class=None,
                 pretrained_model=None,
                 mean=None, initialW=None, fc_kwargs={}, arch='fb'):
        if arch == 'fb':
            stride_first = False
            conv1_no_bias = True
        elif arch == 'he':
            stride_first = True
            # Kaiming He uses bias only for ResNet50
            conv1_no_bias = n_layer != 50
        else:
            raise ValueError('arch is expected to be one of [\'he\', \'fb\']')
        blocks = self._blocks[n_layer]

        param, path = utils.prepare_pretrained_model(
            {'n_class': n_class, 'mean': mean},
            pretrained_model, self._models[arch][n_layer],
            {'n_class': 1000, 'mean': _imagenet_mean})
        self.mean = param['mean']

        if initialW is None:
            initialW = initializers.HeNormal(scale=1., fan_option='fan_out')
        if 'initialW' not in fc_kwargs:
            fc_kwargs['initialW'] = initializers.Normal(scale=0.01)
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            initialW = initializers.constant.Zero()
            fc_kwargs['initialW'] = initializers.constant.Zero()
        kwargs = {'initialW': initialW, 'stride_first': stride_first}

        super(ResNet, self).__init__()
        with self.init_scope():
            self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=conv1_no_bias,
                                       initialW=initialW)
            self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2)
            self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs)
            self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs)
            self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs)
            self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs)
            self.pool5 = lambda x: F.average(x, axis=(2, 3))
            self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs)
            self.prob = F.softmax

        if path:
            chainer.serializers.load_npz(path, self)

Source File: gail.py From baselines with MIT License

4 votes

def _loss(self, fake_batch_obs, fake_batch_action,
              true_batch_obs, true_batch_action):
        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(fake_batch_obs, update=False)
            infer_fake = self.model(normalized_obs, fake_batch_action)
        else:
            infer_fake = self.model(fake_batch_obs, fake_batch_action)
        if self.noisy_label:
            n = fake_batch_obs.shape[0]
            fake_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_fake))
                      + self.discriminator_value_offset))
        else:
            fake_loss = -F.average(F.log(1
                                         - F.sigmoid(infer_fake)
                                         + self.discriminator_value_offset))

        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(true_batch_obs, update=True)
            infer_true = self.model(normalized_obs, true_batch_action)
        else:
            infer_true = self.model(true_batch_obs, true_batch_action)
        if self.noisy_label:
            n = true_batch_obs.shape[0]
            true_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_true))
                      + self.discriminator_value_offset))
        else:
            true_loss = -F.average(F.log(F.sigmoid(infer_true)
                                         + self.discriminator_value_offset))

        entropy = (self._get_entropy(infer_fake) / 2
                   + self._get_entropy(infer_true) / 2)
        loss = (fake_loss + true_loss
                - entropy * self.entropy_coef)

        # Update stats
        self.accuracy_gen = np.average(
            chainer.cuda.to_cpu(infer_fake.array) < 0)
        self.accuracy_exp = np.average(
            chainer.cuda.to_cpu(infer_true.array) > 0)
        self.average_entropy *= self.entropy_decay
        self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array)  # noqa
        self.average_loss *= self.loss_decay
        self.average_loss += (1.0 - self.loss_decay) * \
            chainer.cuda.to_cpu(loss.array)

        return loss

Source File: updater.py From chainer-gan-experiments with MIT License

4 votes

def update_core(self):
        xp = self.gen.xp
        self._iter += 1

        opt_d = self.get_optimizer('dis')
        for i in range(self._dis_iter):
            d_fake = self.get_fake_image_batch()
            d_real = self.get_real_image_batch()

            y_fake = self.dis(Variable(d_fake), test=False)
            y_real = self.dis(Variable(d_real), test=False)

            w1 = F.average(y_fake-y_real)

            loss_dis = w1

            if self._mode == 'gp':
                eta = np.random.rand()
                c = (d_real * eta + (1.0 - eta) * d_fake).astype('f')
                y = self.dis(Variable(c), test=False, retain_forward=True)

                g = xp.ones_like(y.data)
                grad_c = self.dis.differentiable_backward(Variable(g))
                grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3)))

                loss_gp = loss_l2(grad_c_l2, 1.0)

                loss_dis += self._lambda_gp * loss_gp

            opt_d.zero_grads()
            loss_dis.backward()
            opt_d.update()

            if self._mode == 'clip':
                self.dis.clip()

        chainer.report({'loss': loss_dis,'loss_w1': w1}, self.dis)

        z_in = self.get_latent_code_batch()
        x_out = self.gen(Variable(z_in), test=False)

        opt_g = self.get_optimizer('gen')
        y_fake = self.dis(x_out, test=False)
        loss_gen = - F.average(y_fake)

        chainer.report({'loss': loss_gen}, self.gen)

        opt_g.zero_grads()
        loss_gen.backward()
        opt_g.update()

Source File: updater.py From chainer-PGGAN with MIT License

4 votes

def update_core(self):
        opt_g = self.get_optimizer('gen')
        opt_d = self.get_optimizer('dis')

        xp = self.gen.xp

        # update discriminator
        x = self.get_iterator('main').next()
        x = xp.array(x)
        m = len(x)

        z = self.gen.z(m)
        x_tilde = self.gen(z, self.alpha).data
        
        epsilon = xp.random.rand(m, 1, 1, 1).astype('f')
        x_hat = Variable(epsilon * x + (1 - epsilon) * x_tilde)

        dis_x = self.dis(x, self.alpha)
        
        loss_d = self.dis(x_tilde, self.alpha) - dis_x

        g_d, = chainer.grad([self.dis(x_hat, self.alpha)], [x_hat], enable_double_backprop=True)
        g_d_norm = F.sqrt(F.batch_l2_norm_squared(g_d) + 1e-6)
        g_d_norm_delta = g_d_norm - 1
        loss_l = self.lam * g_d_norm_delta * g_d_norm_delta
        
        loss_dr = self.epsilon_drift * dis_x * dis_x

        dis_loss = F.mean(loss_d + loss_l + loss_dr)

        self.dis.cleargrads()
        dis_loss.backward()
        opt_d.update()
        
        # update generator
        z = self.gen.z(m)
        x = self.gen(z, self.alpha)
        gen_loss = F.average(-self.dis(x, self.alpha))

        self.gen.cleargrads()
        gen_loss.backward()
        opt_g.update()

        reporter.report({'loss_d': F.mean(loss_d), 'loss_l': F.mean(loss_l), 'loss_dr': F.mean(loss_dr), 'dis_loss': dis_loss, 'gen_loss': gen_loss, 'alpha': self.alpha})

        self.alpha = self.alpha + self.delta

Python chainer.functions.average() Examples