Python cupy.ElementwiseKernel() Examples
The following are 30
code examples of cupy.ElementwiseKernel().
Example #1
Source File: From cupy with MIT License | 6 votes |
def _kernel_finalize(): return cupy.ElementwiseKernel( 'int32 maxlabel', 'raw int32 labels, raw Y y', ''' if (y[i] < 0) { y[i] = 0; continue; } int yi = y[i]; int j_min = 0; int j_max = maxlabel - 1; int j = (j_min + j_max) / 2; while (j_min < j_max) { if (yi == labels[j]) break; if (yi < labels[j]) j_max = j - 1; else j_min = j + 1; j = (j_min + j_max) / 2; } y[i] = j + 1; ''', 'cupyx_nd_label_finalize')
Example #2
Source File: From pytorch-sso with MIT License | 6 votes |
def __init__(self): self.unpack_kernel = cupy.ElementwiseKernel( 'raw T vec, int32 matrix_size', 'raw T mat', """ int x = i % matrix_size; int y = i / matrix_size; if( x < y ) { int tmp = y; y = x; x = tmp; } mat[i] = vec[matrix_size * y - y * (y + 1) / 2 + x]; """, 'unpack' )
Example #3
Source File: From cupy with MIT License | 6 votes |
def test_python_scalar(self): for typ in (int, float, bool): dtype = numpy.dtype(typ).type in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype) in1 = cupy.array(in1_cpu) scalar_value = typ(2) uesr_kernel_1 = cupy.ElementwiseKernel( 'T x, T y', 'T z', ''' z = x + y; ''', 'uesr_kernel_1') out1 = uesr_kernel_1(in1, scalar_value) expected = in1_cpu + dtype(2) testing.assert_array_equal(out1, expected)
Example #4
Source File: From cupy with MIT License | 6 votes |
def test_manual_indexing(self, n=100): in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32) in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32) uesr_kernel_1 = cupy.ElementwiseKernel( 'T x, T y', 'T z', ''' z = x + y; ''', 'uesr_kernel_1') out1 = uesr_kernel_1(in1, in2) uesr_kernel_2 = cupy.ElementwiseKernel( 'raw T x, raw T y', 'raw T z', ''' z[i] = x[i] + y[i]; ''', 'uesr_kernel_2') out2 = uesr_kernel_2(in1, in2, size=n) testing.assert_array_equal(out1, out2)
Example #5
Source File: From knmt with GNU General Public License v3.0 | 6 votes |
def backward_gpu(self, inputs, gys): if not self.gpu_optim: return self.backward_cpu(inputs, gys) xp = cuda.get_array_module(*inputs) x, gamma, beta = inputs gy, = gys g_beta = xp.sum(gy, axis=0, keepdims=True) g_gamma = xp.sum(gy*self.normalized, axis=0, keepdims=True) gy2 = gy*gamma gy_centered = gy2 - xp.mean(gy2, axis=1, keepdims=True) sc_prod = xp.sum(gy_centered * self.normalized, axis = 1, keepdims=True) H = x.shape[1] # ga = backprop_scale(self.inv_norm, gy_centered, self.normalized, sc_prod/H) ga = cp.ElementwiseKernel( 'T inv_norm, T gy_centered, T normalized, T sc_prod', 'T z', ''' z = inv_norm *(gy_centered - normalized * (sc_prod/%f)); '''%H, 'backprop_scale')(self.inv_norm, gy_centered, self.normalized, sc_prod) return ga, g_gamma, g_beta
Example #6
Source File: From cupy with MIT License | 6 votes |
def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, integer_output=False): in_params = 'raw X x, raw W zoom' out_params = 'Y y' operation, name = _generate_interp_custom( coord_func=_get_coord_zoom, ndim=ndim, large_int=large_int, yshape=yshape, mode=mode, cval=cval, order=order, name='zoom', integer_output=integer_output, ) return cupy.ElementwiseKernel(in_params, out_params, operation, name)
Example #7
Source File: From cupy with MIT License | 6 votes |
def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, integer_output=False): in_params = 'raw X x, raw W shift, raw W zoom' out_params = 'Y y' operation, name = _generate_interp_custom( coord_func=_get_coord_zoom_and_shift, ndim=ndim, large_int=large_int, yshape=yshape, mode=mode, cval=cval, order=order, name='zoom_shift', integer_output=integer_output, ) return cupy.ElementwiseKernel(in_params, out_params, operation, name)
Example #8
Source File: From cupy with MIT License | 6 votes |
def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, integer_output=False): in_params = 'raw X x, raw W shift' out_params = 'Y y' operation, name = _generate_interp_custom( coord_func=_get_coord_shift, ndim=ndim, large_int=large_int, yshape=yshape, mode=mode, cval=cval, order=order, name='shift', integer_output=integer_output, ) return cupy.ElementwiseKernel(in_params, out_params, operation, name)
Example #9
Source File: From cupy with MIT License | 6 votes |
def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, integer_output=False): in_params = 'raw X x, raw W coords' out_params = 'Y y' operation, name = _generate_interp_custom( coord_func=_get_coord_map, ndim=ndim, large_int=large_int, yshape=yshape, mode=mode, cval=cval, order=order, name='shift', integer_output=integer_output, ) return cupy.ElementwiseKernel(in_params, out_params, operation, name)
Example #10
Source File: From cupy with MIT License | 5 votes |
def test_getitem_int(self): x = cupy.arange(24).reshape((2, 3, 4)).astype('i') y = cupy.empty_like(x) y = cupy.ElementwiseKernel( 'raw T x', 'int32 y', 'y = x[i]', 'test_carray_getitem_int', )(x, y) testing.assert_array_equal(y, x)
Example #11
Source File: From mars with Apache License 2.0 | 5 votes |
def execute(cls, ctx, op): import cupy as cp chunk = op.outputs[0] func = cp.ElementwiseKernel(*_evaluate(chunk)) ctx[chunk.key] = func(*[ctx[i.key] for i in op.inputs])
Example #12
Source File: From pyCFTrackers with MIT License | 5 votes |
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1): n, c, kh, kw, out_h, out_w = col.shape img = cp.empty((n, c, h, w), dtype=col.dtype) cp.ElementwiseKernel( 'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,' 'int32 dx, int32 dy', 'T img', ''' int c0 = i / (h * w); int y = i / w % h; int x = i % w; T val = 0; for (int ky = 0; ky < kh; ++ky) { int out_y = (y + ph - ky * dy); if (0 > out_y || out_y >= out_h * sy) continue; if (out_y % sy != 0) continue; out_y /= sy; for (int kx = 0; kx < kw; ++kx) { int out_x = (x + pw - kx * dx); if (0 > out_x || out_x >= out_w * sx) continue; if (out_x % sx != 0) continue; out_x /= sx; int k = out_y + out_h * (kx + kw * (ky + kh * c0)); val = val + col[out_x + out_w * k]; } } img = val; ''', 'col2im')(col.reduced_view(), h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img) return img
Example #13
Source File: From cupy with MIT License | 5 votes |
def test_block_size(self): x = testing.shaped_arange((2, 3, 4), cupy, cupy.float32) kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y') y = kernel(x, 1, block_size=1) testing.assert_array_equal(y, x + 1)
Example #14
Source File: From cupy with MIT License | 5 votes |
def test_invalid_block_size(self): x = testing.shaped_arange((2, 3, 4), cupy, cupy.float32) kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y') with pytest.raises(ValueError): kernel(x, 1, block_size=0)
Example #15
Source File: From cupy with MIT License | 5 votes |
def test_scalar(self, xp, dtype): x = testing.shaped_arange((2, 3, 4), xp, dtype) if xp is numpy: y = numpy.array(self.value).astype(dtype) return x + y else: kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y') return kernel(x, self.value)
Example #16
Source File: From cupy with MIT License | 5 votes |
def test_numpy_scalar(self, dtype): in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype) in1 = cupy.array(in1_cpu) scalar_value = dtype(2) uesr_kernel_1 = cupy.ElementwiseKernel( 'T x, T y', 'T z', ''' z = x + y; ''', 'uesr_kernel_1') out1 = uesr_kernel_1(in1, scalar_value) expected = in1_cpu + dtype(2) testing.assert_array_equal(out1, expected)
Example #17
Source File: From docker-python with Apache License 2.0 | 5 votes |
def test_kernel(self): import cupy as cp x = cp.arange(6, dtype='f').reshape(2, 3) y = cp.arange(3, dtype='f') kernel = cp.ElementwiseKernel( 'float32 x, float32 y', 'float32 z', '''if (x - 2 > y) { z = x * y; } else { z = x + y; }''', 'my_kernel') r = kernel(x, y) self.assertEqual((2, 3), r.shape)
Example #18
Source File: From cupy with MIT License | 5 votes |
def check_array_scalar_op(self, op, xp, dtyes, trans=False): a = xp.array([[1, 2, 3], [4, 5, 6]], dtyes) if trans: a = a.T if xp is cupy: a = DummyObjectWithCudaArrayInterface(a) f = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y') return f(a, dtyes(3)) else: return a + dtyes(3)
Example #19
Source File: From cupy with MIT License | 5 votes |
def test_invalid_kernel_name(self): with self.assertRaisesRegex(ValueError, 'Invalid kernel name'): cupy.ElementwiseKernel('T x', '', '', '1')
Example #20
Source File: From cupy with MIT License | 5 votes |
def test_getitem_idx(self): x = cupy.arange(24).reshape((2, 3, 4)).astype('i') y = cupy.empty_like(x) y = cupy.ElementwiseKernel( 'raw T x', 'int32 y', 'ptrdiff_t idx[] = {i / 12, i / 4 % 3, i % 4}; y = x[idx]', 'test_carray_getitem_idx', )(x, y) testing.assert_array_equal(y, x)
Example #21
Source File: From cupy with MIT License | 5 votes |
def test_strides(self): x = cupy.arange(6).reshape((2, 3)).astype('i') y = cupy.ElementwiseKernel( 'raw int32 x', 'int32 y', 'y = x.strides()[i]', 'test_carray_strides', )(x, size=2) testing.assert_array_equal(y, (12, 4))
Example #22
Source File: From cupy with MIT License | 5 votes |
def test_shape(self): x = cupy.arange(6).reshape((2, 3)).astype('i') y = cupy.ElementwiseKernel( 'raw int32 x', 'int32 y', 'y = x.shape()[i]', 'test_carray_shape', )(x, size=2) testing.assert_array_equal(y, (2, 3))
Example #23
Source File: From cupy with MIT License | 5 votes |
def test_size(self): x = cupy.arange(3).astype('i') y = cupy.ElementwiseKernel( 'raw int32 x', 'int32 y', 'y = x.size()', 'test_carray_size', )(x, size=1) self.assertEqual(int(y[0]), 3)
Example #24
Source File: From pyECO with MIT License | 5 votes |
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1): n, c, kh, kw, out_h, out_w = col.shape img = cp.empty((n, c, h, w), dtype=col.dtype) cp.ElementwiseKernel( 'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,' 'int32 dx, int32 dy', 'T img', ''' int c0 = i / (h * w); int y = i / w % h; int x = i % w; T val = 0; for (int ky = 0; ky < kh; ++ky) { int out_y = (y + ph - ky * dy); if (0 > out_y || out_y >= out_h * sy) continue; if (out_y % sy != 0) continue; out_y /= sy; for (int kx = 0; kx < kw; ++kx) { int out_x = (x + pw - kx * dx); if (0 > out_x || out_x >= out_w * sx) continue; if (out_x % sx != 0) continue; out_x /= sx; int k = out_y + out_h * (kx + kw * (ky + kh * c0)); val = val + col[out_x + out_w * k]; } } img = val; ''', 'col2im')(col.reduced_view(), h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img) return img
Example #25
Source File: From cupy with MIT License | 5 votes |
def _kernel_labels(): return cupy.ElementwiseKernel( '', 'raw Y y, raw int32 count, raw int32 labels', ''' if (y[i] != i) continue; int j = atomicAdd(&count[1], 1); labels[j] = i; ''', 'cupyx_nd_label_labels')
Example #26
Source File: From cupy with MIT License | 5 votes |
def _kernel_count(): return cupy.ElementwiseKernel( '', 'raw Y y, raw int32 count', ''' if (y[i] < 0) continue; int j = i; while (j != y[j]) { j = y[j]; } if (j != i) y[i] = j; else atomicAdd(&count[0], 1); ''', 'cupyx_nd_label_count')
Example #27
Source File: From cupy with MIT License | 5 votes |
def _kernel_connect(): return cupy.ElementwiseKernel( 'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim', 'raw Y y', ''' if (y[i] < 0) continue; for (int dr = 0; dr < ndirs; dr++) { int j = i; int rest = j; int stride = 1; int k = 0; for (int dm = ndim-1; dm >= 0; dm--) { int pos = rest % shape[dm] + dirs[dm + dr * ndim]; if (pos < 0 || pos >= shape[dm]) { k = -1; break; } k += pos * stride; rest /= shape[dm]; stride *= shape[dm]; } if (k < 0) continue; if (y[k] < 0) continue; while (1) { while (j != y[j]) { j = y[j]; } while (k != y[k]) { k = y[k]; } if (j == k) break; if (j < k) { int old = atomicCAS( &y[k], k, j ); if (old == k) break; k = old; } else { int old = atomicCAS( &y[j], j, k ); if (old == j) break; j = old; } } } ''', 'cupyx_nd_label_connect')
Example #28
Source File: From cupy with MIT License | 5 votes |
def _kernel_init(): return cupy.ElementwiseKernel( 'X x', 'Y y', 'if (x == 0) { y = -1; } else { y = i; }', 'cupyx_nd_label_init')
Example #29
Source File: From chainer with MIT License | 5 votes |
def elementwise(in_params, out_params, operation, name, **kwargs): """Creates an elementwise kernel function. This function uses :func:`~chainer.backends.cuda.memoize` to cache the kernel object, i.e. the resulting kernel object is cached for each argument combination and CUDA device. The arguments are the same as those for :class:`cupy.ElementwiseKernel`, except that the ``name`` argument is mandatory. """ check_cuda_available() return cupy.ElementwiseKernel( in_params, out_params, operation, name, **kwargs)
Example #30
Source File: From see with GNU General Public License v3.0 | 5 votes |
def get_label_lengths(self, labels): if self.xp == numpy: label_lengths = self.xp.zeros(len(labels)) for i in range(len(labels)): for j in range(len(labels[i])): if[i][j] == self.blank_symbol: label_lengths[i] = j break else: import cupy label_length_kernel = cupy.ElementwiseKernel( 'raw T labels, int32 blank_symbol, int32 num_labels', 'T length', ''' for (int j = 0; j < num_labels; ++j) { T label_value = labels[i * num_labels + j]; if (label_value == blank_symbol) { length = j; break; } } ''', 'get_label_lengths' ) label_lengths = label_length_kernel(, self.blank_symbol, labels.shape[1], size=len(labels)) return label_lengths