Python Examples of chainer.cuda.elementwise

Source File: weighted_sum_arrays.py From chainerrl with MIT License

6 votes

def forward_gpu(self, inputs):
        n = len(inputs)
        ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs],
                                 dtype=cuda.cupy.int64)
        ws = cuda.cupy.asarray(self.weights, dtype=cuda.cupy.float32)
        y = cuda.elementwise(
            'T x0, int64 xs, raw W ws, int32 n_xs',
            'T y',
            'float** xs_ = (float**) xs;'
            'y = 0;'
            'for (size_t j = 0; j < n_xs; ++j) {'
            '  y += xs_[j][i] * ws[j];'
            '}',
            'weighted_sum_arrays'.format(n))(inputs[0],
                                             ptrs.data.ptr,
                                             ws,
                                             len(ptrs))
        return y,

Source File: opt.py From models with MIT License

6 votes

def update_core_gpu(self, param):
        grad = param.grad
        if grad is None:
            return

        hp = self.hyperparam
        eps = grad.dtype.type(hp.eps)
        if hp.eps != 0 and eps == 0:
            raise ValueError(
                'eps of Adam optimizer is too small for {} ({})'.format(
                    grad.dtype.name, hp.eps))

        cuda.elementwise(
            'T grad, T lr, T one_minus_beta1, T one_minus_beta2, T eps, \
             T eta, T weight_decay_rate',
            'T param, T m, T v',
            '''m += one_minus_beta1 * (grad - m);
               v += one_minus_beta2 * (grad * grad - v);
               param -= eta * lr * (m / (sqrt(v) + eps) +
                               weight_decay_rate * param);''',
            'adam')(grad, self.lr, 1 - hp.beta1,
                    1 - hp.beta2, hp.eps,
                    hp.eta, hp.weight_decay_rate,
                    param.data, self.state['m'], self.state['v'])

Source File: affine_channel_2d.py From chainer-mask-rcnn with MIT License

6 votes

def backward(self, inputs, gy):
        xp = cuda.get_array_module(inputs)
        x, W, b = inputs
        gy, = gy

        if xp is numpy:
            gx = W * gy
            gW = x * gy
        else:
            gx, gW = cuda.elementwise(
                'T x, T W, T b, T gy', 'T gx, T gW',
                'gx = W * gy; gW = x * gy;', 'affine_bwd'
            )(x, W, b, gy)
        gb = gy

        gW = xp.sum(gW, axis=(0, 2, 3), keepdims=True)
        gb = xp.sum(gb, axis=(0, 2, 3), keepdims=True)
        return gx, gW, gb

Source File: spatial_dropout.py From voxelnet_chainer with MIT License

6 votes

def forward(self, x):
        if hasattr(self, 'mask'):
            y = x[0] * self.mask
        else:
            scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
            xp = cuda.get_array_module(*x)
            mask = xp.ones(x[0].shape, dtype=numpy.float32)
            rand = xp.random.rand(*x[0].shape[:2])
            mask[rand <= self.dropout_ratio] = 0

            if xp == numpy:
                self.mask = mask * scale
                y = x[0] * self.mask
            else:
                self.mask, y = cuda.elementwise(
                    'T x, T mask1, T scale', 'T mask, T y',
                    '''
                    mask = mask1 * scale;
                    y = x * mask;
                    ''',
                    'spatial_dropout_fwd',
                )(x[0], mask, scale)
        return y,

Source File: bst.py From GUINNESS with GNU General Public License v2.0

5 votes

def forward_gpu(self, x):
        y = cuda.elementwise(
            'T x', 'T y',
            'y = x >= 0 ? 1 : -1', 'bst_fwd')(
                x[0])
        return y,

Source File: subfuncs.py From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License

5 votes

def backward_gpu(self, x, gy):
        gx = cuda.elementwise(
            'T c, T gy', 'T gx',
            'gx = gy * c',
            'gradmul_bwd')(self.coefficient, gy[0])
        return gx,

Source File: bst.py From GUINNESS with GNU General Public License v2.0

5 votes

def backward_gpu(self, x, gy):
        gx = cuda.elementwise(
            'T x, T gy', 'T gx',
            'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')(
                x[0], gy[0])
        return gx,

Source File: function_integer_conv2d.py From GUINNESS with GNU General Public License v2.0

5 votes

def _kern():
    return cuda.elementwise(
        'T x', 'T y',
        'y = x >= 0 ? 1 : -1',
        'binarize')

Source File: function_binary_conv2d.py From GUINNESS with GNU General Public License v2.0

5 votes

def _kern():
    return cuda.elementwise(
        'T x', 'T y',
        'y = x >= 0 ? 1 : -1',
        'binarize')

Source File: weight_clip.py From GUINNESS with GNU General Public License v2.0

5 votes

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T low, T high', 
                'T p', 
                'p = (p < low) ? low : (p > high) ? high : p',
                'weight_clip')

        for param in opt.target.params():
            p = param.data
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    numpy.clip(p, self.low, self.high)
                else:
                    kernel(self.low, self.high, p)

Source File: adaptive_softmax.py From models with MIT License

5 votes

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs[:2]
        log_y = super(AdaptiveSoftmaxCrossEntropy, self).forward(inputs)[0]
        self.y = cupy.exp(log_y)

        if self.normalize:
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        if self.reduce == 'mean':
            ret = cuda.reduce(
                'S t, raw T log_y, int32 n_channel, raw T coeff, '
                'S ignore_label',
                'T out',
                't == ignore_label ? T(0) : log_y[_j * n_channel + t]',
                'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1],
              self._coeff, self.ignore_label)
        else:
            ret = cuda.elementwise(
                'S t, raw T log_y, int32 n_channel, T ignore', 'T out',
                '''
                if (t == ignore) {
                  out = 0;
                } else {
                  out = -log_y[i * n_channel + t];
                }
                ''',
                'softmax_crossent_no_reduce_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label)
            ret = ret.reshape(t.shape)
        return ret,

Source File: nesterov_ag.py From ram with MIT License

5 votes

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T momentum',
            'T param, T v',
            '''param += momentum * momentum * v - (1 + momentum) * lr * grad;
               v = v * momentum - lr * grad;
               ''',
            'nesterov_ag')(param.grad, self.lr, self.momentum,
                           param.data, state['v'])

Source File: active_batchnorm.py From voxelnet_chainer with MIT License

5 votes

def backward(self, indexes, grad_outputs):
        x, gamma = self.get_retained_inputs()
        # x, gamma, index1, index2 = self.get_retained_inputs()
        gy, = grad_outputs

        # self.retain_inputs((0, 1, 2))
        # x, gamma, gy = inputs
        # x, gamma, index1, index2 = inputs

        expander = self.expander
        inv_m = gamma.dtype.type(1. / (x.size // gamma.size))
        xp = cuda.get_array_module(x)

        gbeta = gy.sum(axis=self.axis)
        x_hat = _x_hat(x, self.mean[expander], self.inv_std[expander])
        ggamma = (gy * x_hat).sum(axis=self.axis)
        if xp is numpy:
            gx = (gamma * self.inv_std)[expander] * (
                gy - (x_hat * ggamma[expander] + gbeta[expander]) * inv_m)
        else:
            gx = cuda.elementwise(
                '''
                T gy, T x_hat, T gamma, T inv_std, T ggamma, T gbeta,
                T inv_m
                ''',
                'T gx',
                '''
                gx = (gamma * inv_std) * (
                    gy - (x_hat * ggamma + gbeta) * inv_m)
                ''', 'bn_bwd')(gy, x_hat, gamma[expander],
                               self.inv_std[expander], ggamma[expander],
                               gbeta[expander], inv_m)
        self.retain_outputs((0, 1))
        return gx, ggamma, gbeta

        # f = BatchNormalizationGrad(
        #     self.eps, self.use_cudnn, self.mode, self.expander, self.axis,
        #     self.mean, self.inv_std)
        # return f(x, gamma, gy)

Source File: active_batchnorm.py From voxelnet_chainer with MIT License

5 votes

def forward(self, inputs):
        self.retain_inputs((0, 1, 2))
        x, gamma, gy = inputs
        # x, gamma, index1, index2 = inputs

        expander = self.expander
        inv_m = gamma.dtype.type(1. / (x.size // gamma.size))
        xp = cuda.get_array_module(x)

        gbeta = gy.sum(axis=self.axis)
        x_hat = _x_hat(x, self.mean[expander], self.inv_std[expander])
        ggamma = (gy * x_hat).sum(axis=self.axis)
        if xp is numpy:
            gx = (gamma * self.inv_std)[expander] * (
                gy - (x_hat * ggamma[expander] + gbeta[expander]) * inv_m)
        else:
            gx = cuda.elementwise(
                '''
                T gy, T x_hat, T gamma, T inv_std, T ggamma, T gbeta,
                T inv_m
                ''',
                'T gx',
                '''
                gx = (gamma * inv_std) * (
                    gy - (x_hat * ggamma + gbeta) * inv_m)
                ''', 'bn_bwd')(gy, x_hat, gamma[expander],
                               self.inv_std[expander], ggamma[expander],
                               gbeta[expander], inv_m)
        self.retain_outputs((0, 1))
        return gx, ggamma, gbeta

Source File: active_batchnorm.py From voxelnet_chainer with MIT License

5 votes

def _apply_bn_fwd(xp, x, mean, inv_std, gamma, beta):
    # NOTE: all arguments should be broadcasted to x.shape
    # (mean, inv_std, gamma, and beta have to already be expanded)
    if xp is numpy:
        x_hat = _x_hat(x, mean, inv_std)
        y = gamma * x_hat
        y += beta
    else:
        y = cuda.elementwise(
            'T x, T mean, T inv_std, T gamma, T beta', 'T y',
            'y = gamma * (x - mean) * inv_std + beta', 'bn_fwd'
        )(x, mean, inv_std, gamma, beta)
    return y

Source File: func_active_bn.py From voxelnet_chainer with MIT License

5 votes

def forward(self, inputs):
        self.retain_inputs((0, 1, 2))
        x, gamma, gy = inputs
        xp = cuda.get_array_module(x)
        # ret = xp.zeros_like(x, dtype="f")
        active_gy = gy.transpose(0, 2, 1)
        active_gy = active_gy[self.mask].reshape(-1, active_gy.shape[2])
        expander = self.expander
        inv_m = gamma.dtype.type(1. / (active_gy.size // gamma.size))
        gbeta = active_gy.sum(axis=0)
        x_hat = _x_hat(x, self.mean[expander], self.inv_std[expander])
        active_x_hat = x_hat.transpose(0, 2, 1)[self.mask].reshape(-1, active_gy.shape[1])
        ggamma = (active_gy * active_x_hat).sum(axis=0)
        if xp is numpy:
            gx = (gamma * self.inv_std)[expander] * (
                gy - (x_hat * ggamma[expander] + gbeta[expander]) * inv_m)
        else:
            gx = cuda.elementwise(
                '''
                T gy, T x_hat, T gamma, T inv_std, T ggamma, T gbeta,
                T inv_m
                ''',
                'T gx',
                '''
                gx = (gamma * inv_std) * (
                    gy - (x_hat * ggamma + gbeta) * inv_m)
                ''', 'bn_bwd')(gy, x_hat, gamma[expander],
                               self.inv_std[expander], ggamma[expander],
                               gbeta[expander], inv_m)
        # ret[:self.active_len] = gx
        gx *= self.orig_mask
        self.retain_outputs((0, 1))
        return gx, ggamma, gbeta

Source File: affine_channel_2d.py From chainer-mask-rcnn with MIT License

5 votes

def forward(self, inputs):
        self.retain_inputs((0, 1, 2))
        xp = cuda.get_array_module(inputs)
        x, W, b = inputs
        if xp is numpy:
            y = W * x + b
        else:
            y = cuda.elementwise(
                'T x, T W, T b', 'T y',
                'y = W * x + b', 'affine_fwd'
            )(x, W, b)
        return y,

Source File: softmax_cross_entropy.py From chainer-segnet with MIT License

5 votes

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = log_softmax._log_softmax(x, self.use_cudnn)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        coeff = gloss * self._coeff
        if self.class_weight is None:
            gx = cuda.elementwise(
                'T y, S t, raw T coeff, S n_channel, S n_unit',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t)));
                ''',
                'softmax_crossent_bwd')(
                    y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
        else:
            gx = cuda.elementwise(
                'T y, raw T w, S t, raw T coeff, S n_channel, S n_unit',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == -1 ? 0 : coeff[0] * (y - (c == t)) * w[t];
                ''',
                'softmax_crossent_bwd')(
                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
                    x.shape[1], n_unit)
        return gx, None

Source File: bst.py From binary_net with Apache License 2.0

5 votes

def forward_gpu(self, x):
        y = cuda.elementwise(
            'T x', 'T y',
            'y = x >= 0 ? 1 : -1', 'bst_fwd')(
                x[0])
        return y,

Source File: bst.py From binary_net with Apache License 2.0

5 votes

def backward_gpu(self, x, gy):
        gx = cuda.elementwise(
            'T x, T gy', 'T gx',
            'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')(
                x[0], gy[0])
        return gx,

Source File: weight_clip.py From binary_net with Apache License 2.0

5 votes

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T low, T high', 
                'T p', 
                'p = (p < low) ? low : (p > high) ? high : p',
                'weight_clip')

        for param in opt.target.params():
            p = param.data
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    numpy.clip(p, self.low, self.high)
                else:
                    kernel(self.low, self.high, p)

Source File: function_binary_linear.py From binary_net with Apache License 2.0

5 votes

def _kern():
    return cuda.elementwise(
        'T x', 'T y',
        'y = x >= 0 ? 1 : -1',
        'binarize')

Source File: chainer-gogh.py From chainer-gogh with MIT License

5 votes

def forward(self, x):
        x = x[0]
        ret = cuda.elementwise(
            'T x','T ret',
            '''
                ret = x<-120?-120:(x>136?136:x);
            ''','clip')(x)
        return ret

Source File: chainer-gogh-multi.py From chainer-gogh with MIT License

5 votes

def forward(self, x):
        ret = cuda.elementwise(
            'T x','T ret',
            '''
                ret = x<-120?-120:(x>136?136:x);
            ''','clip')(x)
        return ret

Source File: faster_gru.py From knmt with GNU General Public License v3.0

5 votes

def backward_gpu(self, x, gy):
        z_x, z_h, h_x, h, hh = x
        g_z_x, g_z_h, g_h_x, g_h, g_hh = cuda.elementwise(
            'T z, T h_bar, T h, T gy', 'T g_z_x, T g_z_h, T g_h_x, T g_h, T g_hh', '''
            g_h = (1 - z) * gy;
            g_hh = z * (1 - h_bar * h_bar) * gy;
            g_h_x = g_hh;
            g_z_x = g_h * z * (h_bar - h);
            g_z_h = g_z_x;
            ''', 'compute_output_gru_bwd')(
            self.z, self.h_bar, h, gy[0])
        return g_z_x, g_z_h, g_h_x, g_h, g_hh,

Source File: nonbias_weight_decay.py From chainerrl with MIT License

5 votes

def __call__(self, rule, param):
        if param.name == 'b':
            return
        p, g = param.array, param.grad
        if p is None or g is None:
            return
        with cuda.get_device_from_array(p) as dev:
            if int(dev) == -1:
                g += self.rate * p
            else:
                kernel = cuda.elementwise(
                    'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')
                kernel(p, self.rate, g)

Source File: noisy_linear.py From chainerrl with MIT License

5 votes

def _noise_function(self, r):
        if self._kernel is None:
            self._kernel = cuda.elementwise(
                '', 'T r',
                '''r = copysignf(sqrtf(fabsf(r)), r);''',
                'noise_func')
        self._kernel(r)

Source File: sum_arrays.py From chainerrl with MIT License

5 votes

def forward_gpu(self, inputs):
        n = len(inputs)
        ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs],
                                 dtype=cuda.cupy.int64)
        y = cuda.elementwise(
            'T x0, int64 xs, int32 n_xs',
            'T y',
            'float** xs_ = (float**) xs;'
            'y = 0;'
            'for (size_t j = 0; j < n_xs; ++j) {'
            '  y += xs_[j][i];'
            '}',
            'sum_arrays'.format(n))(inputs[0], ptrs.data.ptr, len(ptrs))
        return y,

Source File: mul_add.py From chainerrl with MIT License

5 votes

def backward_gpu(self, inputs, grad_outputs):
        x, y, z = inputs
        gw, = grad_outputs

        gx, gy = cuda.elementwise(
            'T x, T y, T gw',
            'T gx, T gy',
            '''
               gx = y * gw;
               gy = x * gw;
            ''',
            'muladd_bwd')(x, y, gw)

        gz = gw
        return gx, gy, gz

Source File: rmsprop_async.py From async-rl with MIT License

5 votes

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T alpha, T eps',
            'T param, T ms',
            '''ms = alpha * ms + (1 - alpha) * grad * grad;
               param -= lr * grad / sqrt(ms + eps);''',
            'rmsprop')(param.grad, self.lr, self.alpha, self.eps,
                       param.data, state['ms'])

Python chainer.cuda.elementwise() Examples