Python cupy.float32() Examples

The following are 30 code examples of cupy.float32().
Example #1
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_rfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            shape = (self.n,) if self.n is not None else None
            plan = get_fft_plan(a, shape=shape, value_type='R2C')
            assert isinstance(plan, cupy.cuda.cufft.Plan1d)
            with plan:
                out = xp.fft.rfft(a, n=self.n, norm=self.norm)
            out = xp.fft.rfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out 
Example #2
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_21(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvMinL1InL2Ball.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        epsilon = 1e0
        b = cbpdn.ConvMinL1InL2Ball(D, s, epsilon, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #3
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_06(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'BackTrack':
             {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.ConvBPDN(D, s, lmbda, opt=opt)
        assert b.X.dtype == dt
        assert b.Xf.dtype == complex_dtype(dt)
        assert b.Yf.dtype == complex_dtype(dt) 
Example #4
Source File:    From FATE with Apache License 2.0 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    # PyTorch does not support unsigned long Tensor.
    # Doesn't matter,since it returns ndarray finally.
    # So I'll keep it unmodified.
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #5
Source File:    From chainer-compiler with MIT License 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #6
Source File:    From EEND with MIT License 6 votes vote down vote up
def use_single_gpu():
    """ Use single GPU device.

    If CUDA_VISIBLE_DEVICES is set, select a device from the variable.
    Otherwise, get a free GPU device and use it.

        assigned GPU id.
    cvd = os.environ.get('CUDA_VISIBLE_DEVICES')
    if cvd is None:
        # no GPUs are researved
        cvd = get_free_gpus()[0]
    elif ',' in cvd:
        # multiple GPUs are researved
        cvd = int(cvd.split(',')[0])
        # single GPU is reserved
        cvd = int(cvd)
    # Use the GPU immediately
    cupy.empty((1,), dtype=cupy.float32)
    return cvd 
Example #7
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_30(self):
        N = 16
        Nd = 5
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N)
        w = cp.ones(s.shape)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho': {'Enabled': True},
             'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.AddMaskSim(cbpdn.ConvBPDN, D, s, w, lmbda, opt=opt)
        assert b.cbpdn.X.dtype == dt
        assert b.cbpdn.Y.dtype == dt
        assert b.cbpdn.U.dtype == dt 
Example #8
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_template_specialization(self):
        if self.backend == 'nvcc':
            self.skipTest('nvcc does not support template specialization')

        # compile code
        name_expressions = ['my_sqrt<int>', 'my_sqrt<float>',
                            'my_sqrt<complex<double>>', 'my_func']
        mod = cupy.RawModule(code=test_cxx_template, options=('--std=c++11',),

        dtypes = (cupy.int32, cupy.float32, cupy.complex128, cupy.float64)
        for ker_T, dtype in zip(name_expressions, dtypes):
            # get specialized kernels
            ker = mod.get_function(ker_T)

            # prepare inputs & expected outputs
            in_arr = cupy.testing.shaped_random((10,), dtype=dtype)
            out_arr = in_arr**2

            # run
            ker((1,), (10,), (in_arr, 10))

            # check results
            assert cupy.allclose(in_arr, out_arr) 
Example #9
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_19(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDNGradReg.Options(
            {'Verbose': False, 'LinSolveCheck': True, 'MaxMainIter': 20,
             'AutoRho': {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvBPDNGradReg(D, s, lmbda, mu, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #10
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_17(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvElasticNet.Options(
            {'Verbose': False, 'LinSolveCheck': True, 'MaxMainIter': 20,
             'AutoRho': {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvElasticNet(D, s, lmbda, mu, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #11
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_context_switch_RawModule4(self):
        # run test_load_cubin() on another device
        # generate cubin in the temp dir and load it on device 0

        device0 = cupy.cuda.Device(0)
        device1 = cupy.cuda.Device(1)
        if device0.compute_capability != device1.compute_capability:
            raise pytest.skip()

        with device0:
            file_path = self._generate_file('cubin')
            mod = cupy.RawModule(path=file_path, backend=self.backend)
            ker = mod.get_function('test_div')

        # in this test, reloading happens at kernel launch
        with device1:
            x1, x2, y = self._helper(ker, cupy.float32)
            assert cupy.allclose(y, x1 / (x2 + 1.0)) 
Example #12
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_06(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options({'Verbose': False, 'MaxMainIter': 20,
                                      'AutoRho': {'Enabled': True},
                                      'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.ConvBPDN(D, s, lmbda, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #13
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_irfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            shape = (self.n,) if self.n is not None else None
            plan = get_fft_plan(a, shape=shape, value_type='C2R')
            assert isinstance(plan, cupy.cuda.cufft.Plan1d)
            with plan:
                out = xp.fft.irfft(a, n=self.n, norm=self.norm)
            out = xp.fft.irfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out 
Example #14
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_irfft2(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
                and int(cupy.cuda.device.get_compute_capability()) < 70
                and _size_last_transform_axis(
                    self.shape, self.s, self.axes) == 2):
            raise unittest.SkipTest('work-around for cuFFT issue')

        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)
        return out 
Example #15
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_irfftn(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
                and int(cupy.cuda.device.get_compute_capability()) < 70
                and _size_last_transform_axis(
                    self.shape, self.s, self.axes) == 2):
            raise unittest.SkipTest('work-around for cuFFT issue')

        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out

# Only those tests in which a legit plan can be obtained are kept 
Example #16
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_irfftn(self, xp, dtype, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            plan = get_fft_plan(a, self.s, self.axes, value_type='C2R')
            with plan:
                out = xp.fft.irfftn(
                    a, s=self.s, axes=self.axes, norm=self.norm)
            out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out

    # TODO(leofang): write test_rfftn_error_on_wrong_plan()? 
Example #17
Source File:    From chainercv with MIT License 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #18
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_load_pickle(self):
        a = testing.shaped_arange((2, 3, 4), dtype=cupy.float32)

        sio = io.BytesIO()
        s = sio.getvalue()

        sio = io.BytesIO(s)
        b = cupy.load(sio, allow_pickle=True)
        testing.assert_array_equal(a, b)

        sio = io.BytesIO(s)
        with self.assertRaises(ValueError):
            cupy.load(sio, allow_pickle=False)
Example #19
Source File:    From cupy with MIT License 6 votes vote down vote up
def test_manual_indexing(self, n=100):
        in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        uesr_kernel_1 = cupy.ElementwiseKernel(
            'T x, T y',
            'T z',
                z = x + y;
        out1 = uesr_kernel_1(in1, in2)

        uesr_kernel_2 = cupy.ElementwiseKernel(
            'raw T x, raw T y',
            'raw T z',
                z[i] = x[i] + y[i];
        out2 = uesr_kernel_2(in1, in2, size=n)

        testing.assert_array_equal(out1, out2) 
Example #20
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_15(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDNJoint.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho': {'Enabled': True},
             'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvBPDNJoint(D, s, lmbda, mu, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #21
Source File:    From cupy with MIT License 5 votes vote down vote up
def _use_cutensor(dtype0, sub0, dtype1, sub1, batch_dims, contract_dims):
    if not cupy.cuda.cutensor_enabled:
        return False
    if dtype0 != dtype1:
        return False
    if dtype0 not in (cupy.float32, cupy.float64,
                      cupy.complex64, cupy.complex128):
        return False
    if (len(contract_dims) >= 1 and (sub0[-1] in batch_dims or
                                     sub1[-1] in batch_dims)):
        return False
    return True 
Example #22
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_04(self):
        N = 8
        D = cp.random.randn(N, N)
        dt = cp.float32
        opt = rpca.RobustPCA.Options({'Verbose': False, 'MaxMainIter': 20,
                            'AutoRho': {'Enabled': True}, 'DataType': dt})
        b = rpca.RobustPCA(D, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #23
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_ihfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        out = xp.fft.ihfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out 
Example #24
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_hfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        out = xp.fft.hfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out 
Example #25
Source File:    From pytorch-sso with MIT License 5 votes vote down vote up
def _get_max_kernel():
    return chainer.cuda.cupy.ReductionKernel(
        'float32 x',
        'float32 y',
        'fmaxf(a, b)',
        'y = a',
Example #26
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_06(self):
        lmbda = 3
        dt = cp.float32
        opt = tvl1.TVL1Deconv.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        b = tvl1.TVL1Deconv(cp.ones((1, )), self.D, lmbda, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #27
Source File:    From sporco with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_04(self):
        lmbda = 3
        dt = cp.float32
        opt = tvl1.TVL1Denoise.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        b = tvl1.TVL1Denoise(self.D, lmbda, opt=opt)
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt 
Example #28
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_rfft2(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.rfft2(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)
        return out 
Example #29
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_rfftn(self, xp, dtype, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            plan = get_fft_plan(a, self.s, self.axes, value_type='R2C')
            with plan:
                out = xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)
            out = xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out 
Example #30
Source File:    From pytorch-sso with MIT License 5 votes vote down vote up
def _get_nccl_dtype(dtype):
    if dtype == np.float16:
        return nccl.NCCL_FLOAT16
    elif dtype == np.float32:
        return nccl.NCCL_FLOAT32
    elif dtype == np.float64:
        return nccl.NCCL_FLOAT64
        raise ValueError(
            'dtype must be numpy.float16, numpy.float32 or numpy.float64,'
            'not {}'.format(dtype))