Python cupy.zeros_like() Examples
The following are 12
code examples of cupy.zeros_like().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
cupy
, or try the search function
.
Example #1
Source File: filters.py From cupy with MIT License | 6 votes |
def _correlate_or_convolve(input, weights, output, mode, cval, origin, convolution=False): origins, int_type = _check_nd_args(input, weights, mode, origin) if weights.size == 0: return cupy.zeros_like(input) if convolution: weights = weights[tuple([slice(None, None, -1)] * weights.ndim)] origins = list(origins) for i, wsize in enumerate(weights.shape): origins[i] = -origins[i] if wsize % 2 == 0: origins[i] -= 1 origins = tuple(origins) kernel = _get_correlate_kernel(mode, weights.shape, int_type, origins, cval) return _call_kernel(kernel, input, weights, output)
Example #2
Source File: filters.py From cupy with MIT License | 6 votes |
def _rank_filter(input, get_rank, size=None, footprint=None, output=None, mode="reflect", cval=0.0, origin=0): _, footprint, _ = _check_size_footprint_structure( input.ndim, size, footprint, None, force_footprint=True) origins, int_type = _check_nd_args(input, footprint, mode, origin, 'footprint') if footprint.size == 0: return cupy.zeros_like(input) filter_size = int(footprint.sum()) rank = get_rank(filter_size) if rank < 0 or rank >= filter_size: raise RuntimeError('rank not within filter footprint size') if rank == 0: return _min_or_max_filter(input, None, footprint, None, output, mode, cval, origins, 'min') if rank == filter_size - 1: return _min_or_max_filter(input, None, footprint, None, output, mode, cval, origins, 'max') kernel = _get_rank_kernel(filter_size, rank, mode, footprint.shape, origins, float(cval), int_type) return _call_kernel(kernel, input, footprint, output, None, bool)
Example #3
Source File: cross.py From neural_renderer with MIT License | 6 votes |
def forward_gpu(self, inputs): a, b = inputs c = cp.zeros_like(a, 'float32') chainer.cuda.elementwise( 'int32 j, raw T a, raw T b', 'raw T c', ''' float* ap = (float*)&a[j * 3]; float* bp = (float*)&b[j * 3]; float* cp = (float*)&c[j * 3]; cp[0] = ap[1] * bp[2] - ap[2] * bp[1]; cp[1] = ap[2] * bp[0] - ap[0] * bp[2]; cp[2] = ap[0] * bp[1] - ap[1] * bp[0]; ''', 'function', )( cp.arange(a.size / 3).astype('int32'), a, b, c, ) return c,
Example #4
Source File: measurements.py From cupy with MIT License | 5 votes |
def _mean_driver(input, labels, index, return_count=False, use_kern=False): if use_kern: return _ndimage_mean_kernel_2(input, labels, index, return_count=return_count) out = cupy.zeros_like(index, cupy.float64) count = cupy.zeros_like(index, dtype=cupy.uint64) sum, count = _ndimage_mean_kernel(input, labels, index, index.size, out, count) if return_count: return sum / count, count return sum / count
Example #5
Source File: test_nccl.py From cupy with MIT License | 5 votes |
def test_single_proc_single_dev(self): comms = cuda.nccl.NcclCommunicator.initAll(1) cuda.nccl.groupStart() for comm in comms: cuda.Device(comm.device_id()).use() sendbuf = cupy.arange(10) recvbuf = cupy.zeros_like(sendbuf) comm.allReduce(sendbuf.data.ptr, recvbuf.data.ptr, 10, cuda.nccl.NCCL_INT64, cuda.nccl.NCCL_SUM, cuda.Stream.null.ptr) cuda.nccl.groupEnd() assert cupy.allclose(sendbuf, recvbuf)
Example #6
Source File: test_basic.py From cupy with MIT License | 5 votes |
def test_zeros_like(self, xp, dtype, order): a = xp.ndarray((2, 3, 4), dtype=dtype) return xp.zeros_like(a, order=order)
Example #7
Source File: test_basic.py From cupy with MIT License | 5 votes |
def test_zeros_like_subok(self): a = cupy.ndarray((2, 3, 4)) with pytest.raises(TypeError): cupy.zeros_like(a, subok=True)
Example #8
Source File: test_basic.py From cupy with MIT License | 5 votes |
def test_zeros_like_reshape(self, xp, dtype, order): a = xp.ndarray((2, 3, 4), dtype=dtype) return xp.zeros_like(a, order=order, shape=self.shape)
Example #9
Source File: BootQAgent.py From DeepRL with MIT License | 5 votes |
def grad(self, _cur_output, _next_output, _next_action, _batch_tuples, _err_list, _err_count, _k): # alloc if self.config.gpu: _cur_output.grad = cupy.zeros_like(_cur_output.data) else: _cur_output.grad = np.zeros_like(_cur_output.data) # compute grad from each tuples for i in range(len(_batch_tuples)): # if use bootstrap and masked if not _batch_tuples[i].mask[_k]: continue cur_action_value = \ _cur_output.data[i][_batch_tuples[i].action].tolist() reward = _batch_tuples[i].reward target_value = reward # if not empty position, not terminal state if _batch_tuples[i].next_state.in_game: next_action_value = \ _next_output.data[i][_next_action[i]].tolist() target_value += self.config.gamma * next_action_value loss = cur_action_value - target_value _cur_output.grad[i][_batch_tuples[i].action] = 2 * loss _err_list[i] += abs(loss) _err_count[i] += 1
Example #10
Source File: filters.py From cupy with MIT License | 4 votes |
def _min_or_max_filter(input, size, footprint, structure, output, mode, cval, origin, func): # structure is used by morphology.grey_erosion() and grey_dilation() # and not by the regular min/max filters sizes, footprint, structure = _check_size_footprint_structure( input.ndim, size, footprint, structure) if sizes is not None: # Seperable filter, run as a series of 1D filters fltr = minimum_filter1d if func == 'min' else maximum_filter1d output_orig = output output = _get_output(output, input) sizes = _fix_sequence_arg(sizes, input.ndim, 'size', int) modes = _fix_sequence_arg(mode, input.ndim, 'mode', _check_mode) origins = _fix_sequence_arg(origin, input.ndim, 'origin', int) n_filters = sum(size > 1 for size in sizes) if n_filters == 0: output[...] = input[...] return output # We can't operate in-place efficiently, so use a 2-buffer system temp = _get_output(output.dtype, input) if n_filters > 1 else None first = True iterator = zip(sizes, modes, origins) for axis, (size, mode, origin) in enumerate(iterator): if size <= 1: continue fltr(input, size, axis, output, mode, cval, origin) input, output = output, temp if first else input if isinstance(output_orig, cupy.ndarray) and input is not output_orig: output_orig[...] = input input = output_orig return input origins, int_type = _check_nd_args(input, footprint, mode, origin, 'footprint') if structure is not None and structure.ndim != input.ndim: raise RuntimeError('structure array has incorrect shape') if footprint.size == 0: return cupy.zeros_like(input) center = tuple(x//2 + origin for x, origin in zip(footprint.shape, origins)) kernel = _get_min_or_max_kernel(mode, footprint.shape, func, origins, float(cval), int_type, has_structure=structure is not None, has_central_value=bool(footprint[center])) return _call_kernel(kernel, input, footprint, output, structure, weights_dtype=bool)
Example #11
Source File: measurements.py From cupy with MIT License | 4 votes |
def sum(input, labels=None, index=None): """Calculates the sum of the values of an n-D image array, optionally at specified sub-regions. Args: input (cupy.ndarray): Nd-image data to process. labels (cupy.ndarray or None): Labels defining sub-regions in `input`. If not None, must be same shape as `input`. index (cupy.ndarray or None): `labels` to include in output. If None (default), all values where `labels` is non-zero are used. Returns: sum (cupy.ndarray): sum of values, for each sub-region if `labels` and `index` are specified. .. seealso:: :func:`scipy.ndimage.sum` """ if not isinstance(input, cupy.ndarray): raise TypeError('input must be cupy.ndarray') if input.dtype in (cupy.complex64, cupy.complex128): raise TypeError("cupyx.scipy.ndimage.sum doesnt support %{}".format( input.dtype.type)) use_kern = False # There is constraints on types because of atomicAdd() in CUDA. if input.dtype not in [cupy.int32, cupy.float16, cupy.float32, cupy.float64, cupy.uint32, cupy.uint64, cupy.ulonglong]: warnings.warn( 'Using the slower implmentation as ' 'cupyx.scipy.ndimage.sum supports int32, float16, ' 'float32, float64, uint32, uint64 as data types' 'for the fast implmentation', util.PerformanceWarning) use_kern = True if labels is None: return input.sum() if len(labels) == 0: return cupy.array([], dtype=cupy.int64) if not isinstance(labels, cupy.ndarray): raise TypeError('label must be cupy.ndarray') if index is None: return input[labels != 0].sum() input, labels = cupy.broadcast_arrays(input, labels) if not isinstance(index, cupy.ndarray): if not isinstance(index, int): raise TypeError('index must be cupy.ndarray or a scalar int') else: return (input[labels == index]).sum() out = cupy.zeros_like(index, dtype=cupy.float64) # The following parameters for sum where determined using a Tesla P100. if (input.size >= 262144 and index.size <= 4) or use_kern: return _ndimage_sum_kernel_2(input, labels, index, out) return _ndimage_sum_kernel(input, labels, index, index.size, out)
Example #12
Source File: test_texture.py From cupy with MIT License | 4 votes |
def test_array_gen_cpy(self): xp = numpy if self.xp == 'numpy' else cupy stream = None if not self.stream else cupy.cuda.Stream() width, height, depth = self.dimensions n_channel = self.n_channels dim = 3 if depth != 0 else 2 if height != 0 else 1 shape = (depth, height, n_channel*width) if dim == 3 else \ (height, n_channel*width) if dim == 2 else \ (n_channel*width,) # generate input data and allocate output buffer if self.dtype in (numpy.float16, numpy.float32): arr = xp.random.random(shape).astype(self.dtype) kind = runtime.cudaChannelFormatKindFloat else: # int # randint() in NumPy <= 1.10 does not have the dtype argument... arr = xp.random.randint(100, size=shape).astype(self.dtype) if self.dtype in (numpy.int8, numpy.int16, numpy.int32): kind = runtime.cudaChannelFormatKindSigned else: kind = runtime.cudaChannelFormatKindUnsigned arr2 = xp.zeros_like(arr) assert arr.flags['C_CONTIGUOUS'] assert arr2.flags['C_CONTIGUOUS'] # create a CUDA array ch_bits = [0, 0, 0, 0] for i in range(n_channel): ch_bits[i] = arr.dtype.itemsize*8 # unpacking arguments using *ch_bits is not supported before PY35... ch = ChannelFormatDescriptor(ch_bits[0], ch_bits[1], ch_bits[2], ch_bits[3], kind) cu_arr = CUDAarray(ch, width, height, depth) # copy from input to CUDA array, and back to output cu_arr.copy_from(arr, stream) cu_arr.copy_to(arr2, stream) # check input and output are identical if stream is not None: dev.synchronize() assert (arr == arr2).all()