Python cupy.ascontiguousarray() Examples
The following are 12
code examples of cupy.ascontiguousarray().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
cupy
, or try the search function
.
Example #1
Source File: non_maximum_suppression.py From chainer-compiler with MIT License | 6 votes |
def _call_nms_kernel(bbox, thresh): assert False, "Not supported." n_bbox = bbox.shape[0] threads_per_block = 64 col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32) blocks = (col_blocks, col_blocks, 1) threads = (threads_per_block, 1, 1) mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64) bbox = cp.ascontiguousarray(bbox, dtype=np.float32) kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel') kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh), bbox, mask_dev)) mask_host = mask_dev.get() selection, n_selec = _nms_gpu_post( mask_host, n_bbox, threads_per_block, col_blocks) return selection, n_selec
Example #2
Source File: non_maximum_suppression.py From FATE with Apache License 2.0 | 6 votes |
def _call_nms_kernel(bbox, thresh): # PyTorch does not support unsigned long Tensor. # Doesn't matter,since it returns ndarray finally. # So I'll keep it unmodified. n_bbox = bbox.shape[0] threads_per_block = 64 col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32) blocks = (col_blocks, col_blocks, 1) threads = (threads_per_block, 1, 1) mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64) bbox = cp.ascontiguousarray(bbox, dtype=np.float32) kern = _load_kernel('nms_kernel', _nms_gpu_code) kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh), bbox, mask_dev)) mask_host = mask_dev.get() selection, n_selec = _nms_gpu_post( mask_host, n_bbox, threads_per_block, col_blocks) return selection, n_selec
Example #3
Source File: non_maximum_suppression.py From chainercv with MIT License | 6 votes |
def _call_nms_kernel(bbox, thresh): n_bbox = bbox.shape[0] threads_per_block = 64 col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32) blocks = (col_blocks, col_blocks, 1) threads = (threads_per_block, 1, 1) mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64) bbox = cp.ascontiguousarray(bbox, dtype=np.float32) kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel') kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh), bbox, mask_dev)) mask_host = mask_dev.get() selection, n_selec = _nms_gpu_post( mask_host, n_bbox, threads_per_block, col_blocks) return selection, n_selec
Example #4
Source File: voxelization.py From mesh_reconstruction with MIT License | 6 votes |
def _voxelize_sub2(faces, size): bs, nf = faces.shape[:2] faces = cp.ascontiguousarray(faces) voxels = cp.zeros((faces.shape[0], size, size, size), 'int32') chainer.cuda.elementwise( 'int32 j, raw T faces, raw int32 bs, raw int32 nf, raw int32 vs', 'raw int32 voxels', ''' int fn = j % nf; int bn = j / nf; float* face = &faces[(bn * nf + fn) * 9]; for (int k = 0; k < 3; k++) { int yi = face[3 * k + 0]; int xi = face[3 * k + 1]; int zi = face[3 * k + 2]; if ((0 <= yi) && (yi < vs) && (0 <= xi) && (xi < vs) && (0 <= zi) && (zi < vs)) voxels[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] = 1; } ''', 'function', )(cp.arange(bs * nf).astype('int32'), faces, bs, nf, size, voxels) return voxels
Example #5
Source File: cp.py From MobulaOP with MIT License | 5 votes |
def get_pointer(v): def p(e): return ctypes.c_void_p(e.data.ptr) if not v.flags.c_contiguous: c = cp.ascontiguousarray(v) return p(c), c return p(v)
Example #6
Source File: filters.py From cupy with MIT License | 5 votes |
def _check_size_footprint_structure(ndim, size, footprint, structure, stacklevel=3, force_footprint=False): if structure is None and footprint is None: if size is None: raise RuntimeError("no footprint or filter size provided") sizes = _fix_sequence_arg(size, ndim, 'size', int) if force_footprint: return None, cupy.ones(sizes, bool), None return sizes, None, None if size is not None: warnings.warn("ignoring size because {} is set".format( 'structure' if footprint is None else 'footprint'), UserWarning, stacklevel=stacklevel+1) if footprint is not None: footprint = cupy.array(footprint, bool, True, 'C') if not footprint.any(): raise ValueError("all-zero footprint is not supported") if structure is None: if not force_footprint and footprint.all(): return footprint.shape, None, None return None, footprint, None structure = cupy.ascontiguousarray(structure) if footprint is None: footprint = cupy.ones(structure.shape, bool) return None, footprint, structure
Example #7
Source File: filters.py From cupy with MIT License | 5 votes |
def _call_kernel(kernel, input, weights, output, structure=None, weights_dtype=cupy.float64, structure_dtype=cupy.float64): """ Calls a constructed ElementwiseKernel. The kernel must take an input image, an optional array of weights, an optional array for the structure, and an output array. weights and structure can be given as None (structure defaults to None) in which case they are not passed to the kernel at all. If the output is given as None then it will be allocated in this function. This function deals with making sure that the weights and structure are contiguous and float64 (or bool for weights that are footprints)*, that the output is allocated and appriopately shaped. This also deals with the situation that the input and output arrays overlap in memory. * weights is always cast to float64 or bool in order to get an output compatible with SciPy, though float32 might be sufficient when input dtype is low precision. If weights_dtype is passed as weights.dtype then no dtype conversion will occur. The input and output are never converted. """ args = [input] if weights is not None: weights = cupy.ascontiguousarray(weights, weights_dtype) args.append(weights) if structure is not None: structure = cupy.ascontiguousarray(structure, structure_dtype) args.append(structure) output = _get_output(output, input) needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS') if needs_temp: output, temp = _get_output(output.dtype, input), output args.append(output) kernel(*args) if needs_temp: temp[...] = output[...] output = temp return output
Example #8
Source File: test_from_data.py From cupy with MIT License | 5 votes |
def test_ascontiguousarray_on_noncontiguous_array(self): a = testing.shaped_arange((2, 3, 4)) b = a.transpose(2, 0, 1) c = cupy.ascontiguousarray(b) assert c.flags.c_contiguous testing.assert_array_equal(b, c)
Example #9
Source File: test_from_data.py From cupy with MIT License | 5 votes |
def test_ascontiguousarray_on_contiguous_array(self): a = testing.shaped_arange((2, 3, 4)) b = cupy.ascontiguousarray(a) assert a is b
Example #10
Source File: test_from_data.py From cupy with MIT License | 5 votes |
def test_asarray_cuda_array_zero_dim(self, xp): a = xp.ones(()) return xp.ascontiguousarray(a)
Example #11
Source File: fft.py From cupy with MIT License | 5 votes |
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None, overwrite_x=False, out=None): if norm not in (None, 'ortho'): raise ValueError('Invalid norm value %s, should be None or "ortho".' % norm) axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type) if not axes_sorted: if value_type == 'C2C': return a else: raise IndexError('list index out of range') a = _convert_dtype(a, value_type) if order == 'A': if a.flags.f_contiguous: order = 'F' elif a.flags.c_contiguous: order = 'C' else: a = cupy.ascontiguousarray(a) order = 'C' elif order not in ['C', 'F']: raise ValueError('Unsupported order: {}'.format(order)) # Note: need to call _cook_shape prior to sorting the axes a = _cook_shape(a, s, axes, value_type, order=order) if order == 'C' and not a.flags.c_contiguous: a = cupy.ascontiguousarray(a) elif order == 'F' and not a.flags.f_contiguous: a = cupy.asfortranarray(a) # _cook_shape tells us input shape only, and not output shape out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type) a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted, overwrite_x=overwrite_x, plan=plan, out=out, out_size=out_size) return a
Example #12
Source File: einsum.py From cupy with MIT License | 4 votes |
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others): set0 = set(sub0) set1 = set(sub1) assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal' assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal' if len(sub0) == 0 or len(sub1) == 0: return arr0 * arr1, sub0 + sub1 set_others = set(sub_others) shared = set0 & set1 batch_dims = shared & set_others contract_dims = shared - batch_dims bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims) bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims) sub_b = [sub0[axis] for axis in bs0] assert sub_b == [sub1[axis] for axis in bs1] sub_l = [sub0[axis] for axis in ts0] sub_r = [sub1[axis] for axis in ts1] sub_out = sub_b + sub_l + sub_r assert set(sub_out) <= set_others, 'operands should be reduced: unary sum' if len(contract_dims) == 0: # Use element-wise multiply when no contraction is needed if len(sub_out) == len(sub_others): # to assure final output of einsum is C-contiguous sub_out = sub_others arr0 = _expand_dims_transpose(arr0, sub0, sub_out) arr1 = _expand_dims_transpose(arr1, sub1, sub_out) return arr0 * arr1, sub_out if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1, batch_dims, contract_dims): if len(sub_out) == len(sub_others): # to assure final output of einsum is C-contiguous sub_out = sub_others out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1, sub_out) arr_out = cupy.empty(out_shape, arr0.dtype) arr0 = cupy.ascontiguousarray(arr0) arr1 = cupy.ascontiguousarray(arr1) desc_0 = cutensor.create_tensor_descriptor(arr0) desc_1 = cutensor.create_tensor_descriptor(arr1) desc_out = cutensor.create_tensor_descriptor(arr_out) arr_out = cutensor.contraction(1.0, arr0, desc_0, sub0, arr1, desc_1, sub1, 0.0, arr_out, desc_out, sub_out) return arr_out, sub_out tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0]) tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1]) shapes_out = shapes0[0] + shapes0[1] + shapes1[2] assert shapes0[0] == shapes1[0] arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out) return arr_out, sub_out