Python Examples of cupy.ascontiguousarray

Source File: non_maximum_suppression.py From chainer-compiler with MIT License

6 votes

def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: non_maximum_suppression.py From FATE with Apache License 2.0

6 votes

def _call_nms_kernel(bbox, thresh):
    # PyTorch does not support unsigned long Tensor.
    # Doesn't matter,since it returns ndarray finally.
    # So I'll keep it unmodified.
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: non_maximum_suppression.py From chainercv with MIT License

6 votes

def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: voxelization.py From mesh_reconstruction with MIT License

6 votes

def _voxelize_sub2(faces, size):
    bs, nf = faces.shape[:2]
    faces = cp.ascontiguousarray(faces)
    voxels = cp.zeros((faces.shape[0], size, size, size), 'int32')
    chainer.cuda.elementwise(
        'int32 j, raw T faces, raw int32 bs, raw int32 nf, raw int32 vs',
        'raw int32 voxels',
        '''
            int fn = j % nf;
            int bn = j / nf;
            float* face = &faces[(bn * nf + fn) * 9];
            for (int k = 0; k < 3; k++) {
                int yi = face[3 * k + 0];
                int xi = face[3 * k + 1];
                int zi = face[3 * k + 2];
                if ((0 <= yi) && (yi < vs) && (0 <= xi) && (xi < vs) && (0 <= zi) && (zi < vs))
                    voxels[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] = 1;
            }
        ''',
        'function',
    )(cp.arange(bs * nf).astype('int32'), faces, bs, nf, size, voxels)
    return voxels

Source File: cp.py From MobulaOP with MIT License

5 votes

def get_pointer(v):
    def p(e):
        return ctypes.c_void_p(e.data.ptr)
    if not v.flags.c_contiguous:
        c = cp.ascontiguousarray(v)
        return p(c), c
    return p(v)

Source File: filters.py From cupy with MIT License

5 votes

def _check_size_footprint_structure(ndim, size, footprint, structure,
                                    stacklevel=3, force_footprint=False):
    if structure is None and footprint is None:
        if size is None:
            raise RuntimeError("no footprint or filter size provided")
        sizes = _fix_sequence_arg(size, ndim, 'size', int)
        if force_footprint:
            return None, cupy.ones(sizes, bool), None
        return sizes, None, None
    if size is not None:
        warnings.warn("ignoring size because {} is set".format(
            'structure' if footprint is None else 'footprint'),
            UserWarning, stacklevel=stacklevel+1)

    if footprint is not None:
        footprint = cupy.array(footprint, bool, True, 'C')
        if not footprint.any():
            raise ValueError("all-zero footprint is not supported")

    if structure is None:
        if not force_footprint and footprint.all():
            return footprint.shape, None, None
        return None, footprint, None

    structure = cupy.ascontiguousarray(structure)
    if footprint is None:
        footprint = cupy.ones(structure.shape, bool)
    return None, footprint, structure

Source File: filters.py From cupy with MIT License

5 votes

def _call_kernel(kernel, input, weights, output, structure=None,
                 weights_dtype=cupy.float64, structure_dtype=cupy.float64):
    """
    Calls a constructed ElementwiseKernel. The kernel must take an input image,
    an optional array of weights, an optional array for the structure, and an
    output array.

    weights and structure can be given as None (structure defaults to None) in
    which case they are not passed to the kernel at all. If the output is given
    as None then it will be allocated in this function.

    This function deals with making sure that the weights and structure are
    contiguous and float64 (or bool for weights that are footprints)*, that the
    output is allocated and appriopately shaped. This also deals with the
    situation that the input and output arrays overlap in memory.

    * weights is always cast to float64 or bool in order to get an output
    compatible with SciPy, though float32 might be sufficient when input dtype
    is low precision. If weights_dtype is passed as weights.dtype then no
    dtype conversion will occur. The input and output are never converted.
    """
    args = [input]
    if weights is not None:
        weights = cupy.ascontiguousarray(weights, weights_dtype)
        args.append(weights)
    if structure is not None:
        structure = cupy.ascontiguousarray(structure, structure_dtype)
        args.append(structure)
    output = _get_output(output, input)
    needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
    if needs_temp:
        output, temp = _get_output(output.dtype, input), output
    args.append(output)
    kernel(*args)
    if needs_temp:
        temp[...] = output[...]
        output = temp
    return output

Source File: test_from_data.py From cupy with MIT License

5 votes

def test_ascontiguousarray_on_noncontiguous_array(self):
        a = testing.shaped_arange((2, 3, 4))
        b = a.transpose(2, 0, 1)
        c = cupy.ascontiguousarray(b)
        assert c.flags.c_contiguous
        testing.assert_array_equal(b, c)

Source File: test_from_data.py From cupy with MIT License

5 votes

def test_ascontiguousarray_on_contiguous_array(self):
        a = testing.shaped_arange((2, 3, 4))
        b = cupy.ascontiguousarray(a)
        assert a is b

Source File: test_from_data.py From cupy with MIT License

5 votes

def test_asarray_cuda_array_zero_dim(self, xp):
        a = xp.ones(())
        return xp.ascontiguousarray(a)

Source File: fft.py From cupy with MIT License

5 votes

def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None,
          overwrite_x=False, out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".'
                         % norm)

    axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
    if not axes_sorted:
        if value_type == 'C2C':
            return a
        else:
            raise IndexError('list index out of range')
    a = _convert_dtype(a, value_type)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    # Note: need to call _cook_shape prior to sorting the axes
    a = _cook_shape(a, s, axes, value_type, order=order)

    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)

    # _cook_shape tells us input shape only, and not output shape
    out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type)

    a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted,
                   overwrite_x=overwrite_x, plan=plan, out=out,
                   out_size=out_size)
    return a

Source File: einsum.py From cupy with MIT License

4 votes

def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if len(contract_dims) == 0:
        # Use element-wise multiply when no contraction is needed
        if len(sub_out) == len(sub_others):
            # to assure final output of einsum is C-contiguous
            sub_out = sub_others
        arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
        arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
        return arr0 * arr1, sub_out

    if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1,
                     batch_dims, contract_dims):
        if len(sub_out) == len(sub_others):
            # to assure final output of einsum is C-contiguous
            sub_out = sub_others
        out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1, sub_out)
        arr_out = cupy.empty(out_shape, arr0.dtype)
        arr0 = cupy.ascontiguousarray(arr0)
        arr1 = cupy.ascontiguousarray(arr1)
        desc_0 = cutensor.create_tensor_descriptor(arr0)
        desc_1 = cutensor.create_tensor_descriptor(arr1)
        desc_out = cutensor.create_tensor_descriptor(arr_out)
        arr_out = cutensor.contraction(1.0,
                                       arr0, desc_0, sub0,
                                       arr1, desc_1, sub1,
                                       0.0,
                                       arr_out, desc_out, sub_out)
        return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out

Python cupy.ascontiguousarray() Examples