Python cupy.ElementwiseKernel() Examples

The following are 30 code examples of cupy.ElementwiseKernel(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module cupy , or try the search function .
Example #1
Source File: measurements.py    From cupy with MIT License 6 votes vote down vote up
def _kernel_finalize():
    return cupy.ElementwiseKernel(
        'int32 maxlabel', 'raw int32 labels, raw Y y',
        '''
        if (y[i] < 0) {
            y[i] = 0;
            continue;
        }
        int yi = y[i];
        int j_min = 0;
        int j_max = maxlabel - 1;
        int j = (j_min + j_max) / 2;
        while (j_min < j_max) {
            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        ''',
        'cupyx_nd_label_finalize') 
Example #2
Source File: _utility.py    From pytorch-sso with MIT License 6 votes vote down vote up
def __init__(self):
        self.unpack_kernel = cupy.ElementwiseKernel(
            'raw T vec, int32 matrix_size',
            'raw T mat',
            """
            int x = i % matrix_size;
            int y = i / matrix_size;
            if( x < y ) {
                int tmp = y;
                y = x;
                x = tmp;
            }
            mat[i] = vec[matrix_size * y - y * (y + 1) / 2 + x];
            """,
            'unpack'
        ) 
Example #3
Source File: test_userkernel.py    From cupy with MIT License 6 votes vote down vote up
def test_python_scalar(self):
        for typ in (int, float, bool):
            dtype = numpy.dtype(typ).type
            in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype)
            in1 = cupy.array(in1_cpu)
            scalar_value = typ(2)
            uesr_kernel_1 = cupy.ElementwiseKernel(
                'T x, T y',
                'T z',
                '''
                    z = x + y;
                ''',
                'uesr_kernel_1')
            out1 = uesr_kernel_1(in1, scalar_value)

            expected = in1_cpu + dtype(2)
            testing.assert_array_equal(out1, expected) 
Example #4
Source File: test_userkernel.py    From cupy with MIT License 6 votes vote down vote up
def test_manual_indexing(self, n=100):
        in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        uesr_kernel_1 = cupy.ElementwiseKernel(
            'T x, T y',
            'T z',
            '''
                z = x + y;
            ''',
            'uesr_kernel_1')
        out1 = uesr_kernel_1(in1, in2)

        uesr_kernel_2 = cupy.ElementwiseKernel(
            'raw T x, raw T y',
            'raw T z',
            '''
                z[i] = x[i] + y[i];
            ''',
            'uesr_kernel_2')
        out2 = uesr_kernel_2(in1, in2, size=n)

        testing.assert_array_equal(out1, out2) 
Example #5
Source File: layer_normalization.py    From knmt with GNU General Public License v3.0 6 votes vote down vote up
def backward_gpu(self, inputs, gys):
        if not self.gpu_optim:
            return self.backward_cpu(inputs,  gys)
        xp = cuda.get_array_module(*inputs)
        x, gamma, beta = inputs
        gy, = gys
        g_beta = xp.sum(gy, axis=0, keepdims=True)
        g_gamma = xp.sum(gy*self.normalized, axis=0, keepdims=True)
        
        gy2 = gy*gamma
        gy_centered = gy2 - xp.mean(gy2, axis=1, keepdims=True)
        sc_prod = xp.sum(gy_centered * self.normalized, axis = 1, keepdims=True)
        
        H = x.shape[1]
#         ga = backprop_scale(self.inv_norm, gy_centered, self.normalized, sc_prod/H)
        ga = cp.ElementwiseKernel(
         'T inv_norm, T gy_centered, T normalized, T sc_prod',
         'T z',
          '''
              z = inv_norm *(gy_centered - normalized * (sc_prod/%f));
         '''%H,
         'backprop_scale')(self.inv_norm, gy_centered, self.normalized, sc_prod)
        
        return ga, g_gamma, g_beta 
Example #6
Source File: _interp_kernels.py    From cupy with MIT License 6 votes vote down vote up
def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
                     integer_output=False):
    in_params = 'raw X x, raw W zoom'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_zoom,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='zoom',
        integer_output=integer_output,
    )
    return cupy.ElementwiseKernel(in_params, out_params, operation, name) 
Example #7
Source File: _interp_kernels.py    From cupy with MIT License 6 votes vote down vote up
def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
                           integer_output=False):
    in_params = 'raw X x, raw W shift, raw W zoom'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_zoom_and_shift,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='zoom_shift',
        integer_output=integer_output,
    )
    return cupy.ElementwiseKernel(in_params, out_params, operation, name) 
Example #8
Source File: _interp_kernels.py    From cupy with MIT License 6 votes vote down vote up
def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
                      integer_output=False):
    in_params = 'raw X x, raw W shift'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_shift,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='shift',
        integer_output=integer_output,
    )
    return cupy.ElementwiseKernel(in_params, out_params, operation, name) 
Example #9
Source File: _interp_kernels.py    From cupy with MIT License 6 votes vote down vote up
def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
                    integer_output=False):
    in_params = 'raw X x, raw W coords'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_map,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='shift',
        integer_output=integer_output,
    )
    return cupy.ElementwiseKernel(in_params, out_params, operation, name) 
Example #10
Source File: test_carray.py    From cupy with MIT License 5 votes vote down vote up
def test_getitem_int(self):
        x = cupy.arange(24).reshape((2, 3, 4)).astype('i')
        y = cupy.empty_like(x)
        y = cupy.ElementwiseKernel(
            'raw T x', 'int32 y', 'y = x[i]', 'test_carray_getitem_int',
        )(x, y)
        testing.assert_array_equal(y, x) 
Example #11
Source File: cp.py    From mars with Apache License 2.0 5 votes vote down vote up
def execute(cls, ctx, op):
        import cupy as cp

        chunk = op.outputs[0]
        func = cp.ElementwiseKernel(*_evaluate(chunk))
        ctx[chunk.key] = func(*[ctx[i.key] for i in op.inputs]) 
Example #12
Source File: cuda_tools.py    From pyCFTrackers with MIT License 5 votes vote down vote up
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
    n, c, kh, kw, out_h, out_w = col.shape
    img = cp.empty((n, c, h, w), dtype=col.dtype)
    cp.ElementwiseKernel(
        'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,'
        'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
        'int32 dx, int32 dy',
        'T img',
        '''
           int c0 = i / (h * w);
           int y  = i / w % h;
           int x  = i % w;
           T val = 0;
           for (int ky = 0; ky < kh; ++ky) {
             int out_y = (y + ph - ky * dy);
             if (0 > out_y || out_y >= out_h * sy) continue;
             if (out_y % sy != 0) continue;
             out_y /= sy;
             for (int kx = 0; kx < kw; ++kx) {
               int out_x = (x + pw - kx * dx);
               if (0 > out_x || out_x >= out_w * sx) continue;
               if (out_x % sx != 0) continue;
               out_x /= sx;
               int k = out_y + out_h * (kx + kw * (ky + kh * c0));
               val = val + col[out_x + out_w * k];
             }
           }
           img = val;
        ''',
        'col2im')(col.reduced_view(),
                  h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img)
    return img 
Example #13
Source File: test_userkernel.py    From cupy with MIT License 5 votes vote down vote up
def test_block_size(self):
        x = testing.shaped_arange((2, 3, 4), cupy, cupy.float32)
        kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y')
        y = kernel(x, 1, block_size=1)
        testing.assert_array_equal(y, x + 1) 
Example #14
Source File: test_userkernel.py    From cupy with MIT License 5 votes vote down vote up
def test_invalid_block_size(self):
        x = testing.shaped_arange((2, 3, 4), cupy, cupy.float32)
        kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y')
        with pytest.raises(ValueError):
            kernel(x, 1, block_size=0) 
Example #15
Source File: test_userkernel.py    From cupy with MIT License 5 votes vote down vote up
def test_scalar(self, xp, dtype):
        x = testing.shaped_arange((2, 3, 4), xp, dtype)
        if xp is numpy:
            y = numpy.array(self.value).astype(dtype)
            return x + y
        else:
            kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y')
            return kernel(x, self.value) 
Example #16
Source File: test_userkernel.py    From cupy with MIT License 5 votes vote down vote up
def test_numpy_scalar(self, dtype):
        in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype)
        in1 = cupy.array(in1_cpu)
        scalar_value = dtype(2)
        uesr_kernel_1 = cupy.ElementwiseKernel(
            'T x, T y',
            'T z',
            '''
                z = x + y;
            ''',
            'uesr_kernel_1')
        out1 = uesr_kernel_1(in1, scalar_value)

        expected = in1_cpu + dtype(2)
        testing.assert_array_equal(out1, expected) 
Example #17
Source File: test_cupy.py    From docker-python with Apache License 2.0 5 votes vote down vote up
def test_kernel(self):
        import cupy as cp
        x = cp.arange(6, dtype='f').reshape(2, 3)
        y = cp.arange(3, dtype='f')
        kernel = cp.ElementwiseKernel(
            'float32 x, float32 y', 'float32 z',
            '''if (x - 2 > y) {
                z = x * y;
            } else {
                z = x + y;
            }''',
            'my_kernel')
        r = kernel(x, y)
        
        self.assertEqual((2, 3), r.shape) 
Example #18
Source File: test_ndarray_cuda_array_interface.py    From cupy with MIT License 5 votes vote down vote up
def check_array_scalar_op(self, op, xp, dtyes, trans=False):
        a = xp.array([[1, 2, 3], [4, 5, 6]], dtyes)
        if trans:
            a = a.T

        if xp is cupy:
            a = DummyObjectWithCudaArrayInterface(a)
            f = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y')
            return f(a, dtyes(3))
        else:
            return a + dtyes(3) 
Example #19
Source File: test_elementwise.py    From cupy with MIT License 5 votes vote down vote up
def test_invalid_kernel_name(self):
        with self.assertRaisesRegex(ValueError, 'Invalid kernel name'):
            cupy.ElementwiseKernel('T x', '', '', '1') 
Example #20
Source File: test_carray.py    From cupy with MIT License 5 votes vote down vote up
def test_getitem_idx(self):
        x = cupy.arange(24).reshape((2, 3, 4)).astype('i')
        y = cupy.empty_like(x)
        y = cupy.ElementwiseKernel(
            'raw T x', 'int32 y',
            'ptrdiff_t idx[] = {i / 12, i / 4 % 3, i % 4}; y = x[idx]',
            'test_carray_getitem_idx',
        )(x, y)
        testing.assert_array_equal(y, x) 
Example #21
Source File: test_carray.py    From cupy with MIT License 5 votes vote down vote up
def test_strides(self):
        x = cupy.arange(6).reshape((2, 3)).astype('i')
        y = cupy.ElementwiseKernel(
            'raw int32 x', 'int32 y', 'y = x.strides()[i]',
            'test_carray_strides',
        )(x, size=2)
        testing.assert_array_equal(y, (12, 4)) 
Example #22
Source File: test_carray.py    From cupy with MIT License 5 votes vote down vote up
def test_shape(self):
        x = cupy.arange(6).reshape((2, 3)).astype('i')
        y = cupy.ElementwiseKernel(
            'raw int32 x', 'int32 y', 'y = x.shape()[i]', 'test_carray_shape',
        )(x, size=2)
        testing.assert_array_equal(y, (2, 3)) 
Example #23
Source File: test_carray.py    From cupy with MIT License 5 votes vote down vote up
def test_size(self):
        x = cupy.arange(3).astype('i')
        y = cupy.ElementwiseKernel(
            'raw int32 x', 'int32 y', 'y = x.size()', 'test_carray_size',
        )(x, size=1)
        self.assertEqual(int(y[0]), 3) 
Example #24
Source File: cuda_tools.py    From pyECO with MIT License 5 votes vote down vote up
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
    n, c, kh, kw, out_h, out_w = col.shape
    img = cp.empty((n, c, h, w), dtype=col.dtype)
    cp.ElementwiseKernel(
        'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,'
        'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
        'int32 dx, int32 dy',
        'T img',
        '''
           int c0 = i / (h * w);
           int y  = i / w % h;
           int x  = i % w;
           T val = 0;
           for (int ky = 0; ky < kh; ++ky) {
             int out_y = (y + ph - ky * dy);
             if (0 > out_y || out_y >= out_h * sy) continue;
             if (out_y % sy != 0) continue;
             out_y /= sy;
             for (int kx = 0; kx < kw; ++kx) {
               int out_x = (x + pw - kx * dx);
               if (0 > out_x || out_x >= out_w * sx) continue;
               if (out_x % sx != 0) continue;
               out_x /= sx;
               int k = out_y + out_h * (kx + kw * (ky + kh * c0));
               val = val + col[out_x + out_w * k];
             }
           }
           img = val;
        ''',
        'col2im')(col.reduced_view(),
                  h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img)
    return img 
Example #25
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_labels():
    return cupy.ElementwiseKernel(
        '', 'raw Y y, raw int32 count, raw int32 labels',
        '''
        if (y[i] != i) continue;
        int j = atomicAdd(&count[1], 1);
        labels[j] = i;
        ''',
        'cupyx_nd_label_labels') 
Example #26
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_count():
    return cupy.ElementwiseKernel(
        '', 'raw Y y, raw int32 count',
        '''
        if (y[i] < 0) continue;
        int j = i;
        while (j != y[j]) { j = y[j]; }
        if (j != i) y[i] = j;
        else atomicAdd(&count[0], 1);
        ''',
        'cupyx_nd_label_count') 
Example #27
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_connect():
    return cupy.ElementwiseKernel(
        'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim',
        'raw Y y',
        '''
        if (y[i] < 0) continue;
        for (int dr = 0; dr < ndirs; dr++) {
            int j = i;
            int rest = j;
            int stride = 1;
            int k = 0;
            for (int dm = ndim-1; dm >= 0; dm--) {
                int pos = rest % shape[dm] + dirs[dm + dr * ndim];
                if (pos < 0 || pos >= shape[dm]) {
                    k = -1;
                    break;
                }
                k += pos * stride;
                rest /= shape[dm];
                stride *= shape[dm];
            }
            if (k < 0) continue;
            if (y[k] < 0) continue;
            while (1) {
                while (j != y[j]) { j = y[j]; }
                while (k != y[k]) { k = y[k]; }
                if (j == k) break;
                if (j < k) {
                    int old = atomicCAS( &y[k], k, j );
                    if (old == k) break;
                    k = old;
                }
                else {
                    int old = atomicCAS( &y[j], j, k );
                    if (old == j) break;
                    j = old;
                }
            }
        }
        ''',
        'cupyx_nd_label_connect') 
Example #28
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_init():
    return cupy.ElementwiseKernel(
        'X x', 'Y y', 'if (x == 0) { y = -1; } else { y = i; }',
        'cupyx_nd_label_init') 
Example #29
Source File: cuda.py    From chainer with MIT License 5 votes vote down vote up
def elementwise(in_params, out_params, operation, name, **kwargs):
    """Creates an elementwise kernel function.

    This function uses :func:`~chainer.backends.cuda.memoize` to cache the
    kernel object, i.e. the resulting kernel object is cached for each argument
    combination and CUDA device.

    The arguments are the same as those for
    :class:`cupy.ElementwiseKernel`, except that the ``name`` argument is
    mandatory.

    """
    check_cuda_available()
    return cupy.ElementwiseKernel(
        in_params, out_params, operation, name, **kwargs) 
Example #30
Source File: loss_metrics.py    From see with GNU General Public License v3.0 5 votes vote down vote up
def get_label_lengths(self, labels):
        if self.xp == numpy:
            label_lengths = self.xp.zeros(len(labels))

            for i in range(len(labels)):
                for j in range(len(labels[i])):
                    if labels.data[i][j] == self.blank_symbol:
                        label_lengths[i] = j
                        break
        else:
            import cupy
            label_length_kernel = cupy.ElementwiseKernel(
                'raw T labels, int32 blank_symbol, int32 num_labels',
                'T length',
                '''
                    for (int j = 0; j < num_labels; ++j) {
                        T label_value = labels[i * num_labels + j];
                        if (label_value == blank_symbol) {
                            length = j;
                            break;
                        }
                    }
                ''',
                'get_label_lengths'
            )
            label_lengths = label_length_kernel(labels.data, self.blank_symbol, labels.shape[1], size=len(labels))
        return label_lengths