Python Examples of cupy.float32

Source File: test_fft.py From cupy with MIT License

6 votes

def test_rfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            shape = (self.n,) if self.n is not None else None
            plan = get_fft_plan(a, shape=shape, value_type='R2C')
            assert isinstance(plan, cupy.cuda.cufft.Plan1d)
            with plan:
                out = xp.fft.rfft(a, n=self.n, norm=self.norm)
        else:
            out = xp.fft.rfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_21(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvMinL1InL2Ball.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        epsilon = 1e0
        b = cbpdn.ConvMinL1InL2Ball(D, s, epsilon, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_06(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'BackTrack':
             {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.ConvBPDN(D, s, lmbda, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Xf.dtype == complex_dtype(dt)
        assert b.Yf.dtype == complex_dtype(dt)

Source File: non_maximum_suppression.py From FATE with Apache License 2.0

6 votes

def _call_nms_kernel(bbox, thresh):
    # PyTorch does not support unsigned long Tensor.
    # Doesn't matter,since it returns ndarray finally.
    # So I'll keep it unmodified.
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: non_maximum_suppression.py From chainer-compiler with MIT License

6 votes

def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: utils.py From EEND with MIT License

6 votes

def use_single_gpu():
    """ Use single GPU device.

    If CUDA_VISIBLE_DEVICES is set, select a device from the variable.
    Otherwise, get a free GPU device and use it.

    Returns:
        assigned GPU id.
    """
    cvd = os.environ.get('CUDA_VISIBLE_DEVICES')
    if cvd is None:
        # no GPUs are researved
        cvd = get_free_gpus()[0]
    elif ',' in cvd:
        # multiple GPUs are researved
        cvd = int(cvd.split(',')[0])
    else:
        # single GPU is reserved
        cvd = int(cvd)
    # Use the GPU immediately
    chainer.cuda.get_device_from_id(cvd).use()
    cupy.empty((1,), dtype=cupy.float32)
    return cvd

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_30(self):
        N = 16
        Nd = 5
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N)
        w = cp.ones(s.shape)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho': {'Enabled': True},
             'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.AddMaskSim(cbpdn.ConvBPDN, D, s, w, lmbda, opt=opt)
        b.solve()
        assert b.cbpdn.X.dtype == dt
        assert b.cbpdn.Y.dtype == dt
        assert b.cbpdn.U.dtype == dt

Source File: test_raw.py From cupy with MIT License

6 votes

def test_template_specialization(self):
        if self.backend == 'nvcc':
            self.skipTest('nvcc does not support template specialization')

        # compile code
        name_expressions = ['my_sqrt<int>', 'my_sqrt<float>',
                            'my_sqrt<complex<double>>', 'my_func']
        mod = cupy.RawModule(code=test_cxx_template, options=('--std=c++11',),
                             name_expressions=name_expressions)

        dtypes = (cupy.int32, cupy.float32, cupy.complex128, cupy.float64)
        for ker_T, dtype in zip(name_expressions, dtypes):
            # get specialized kernels
            ker = mod.get_function(ker_T)

            # prepare inputs & expected outputs
            in_arr = cupy.testing.shaped_random((10,), dtype=dtype)
            out_arr = in_arr**2

            # run
            ker((1,), (10,), (in_arr, 10))

            # check results
            assert cupy.allclose(in_arr, out_arr)

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_19(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDNGradReg.Options(
            {'Verbose': False, 'LinSolveCheck': True, 'MaxMainIter': 20,
             'AutoRho': {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvBPDNGradReg(D, s, lmbda, mu, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_17(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvElasticNet.Options(
            {'Verbose': False, 'LinSolveCheck': True, 'MaxMainIter': 20,
             'AutoRho': {'Enabled': True}, 'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvElasticNet(D, s, lmbda, mu, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_raw.py From cupy with MIT License

6 votes

def test_context_switch_RawModule4(self):
        # run test_load_cubin() on another device
        # generate cubin in the temp dir and load it on device 0

        device0 = cupy.cuda.Device(0)
        device1 = cupy.cuda.Device(1)
        if device0.compute_capability != device1.compute_capability:
            raise pytest.skip()

        with device0:
            file_path = self._generate_file('cubin')
            mod = cupy.RawModule(path=file_path, backend=self.backend)
            ker = mod.get_function('test_div')

        # in this test, reloading happens at kernel launch
        with device1:
            x1, x2, y = self._helper(ker, cupy.float32)
            assert cupy.allclose(y, x1 / (x2 + 1.0))

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_06(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDN.Options({'Verbose': False, 'MaxMainIter': 20,
                                      'AutoRho': {'Enabled': True},
                                      'DataType': dt})
        lmbda = 1e-1
        b = cbpdn.ConvBPDN(D, s, lmbda, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_fft.py From cupy with MIT License

6 votes

def test_irfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            shape = (self.n,) if self.n is not None else None
            plan = get_fft_plan(a, shape=shape, value_type='C2R')
            assert isinstance(plan, cupy.cuda.cufft.Plan1d)
            with plan:
                out = xp.fft.irfft(a, n=self.n, norm=self.norm)
        else:
            out = xp.fft.irfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out

Source File: test_fft.py From cupy with MIT License

6 votes

def test_irfft2(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
                and int(cupy.cuda.device.get_compute_capability()) < 70
                and _size_last_transform_axis(
                    self.shape, self.s, self.axes) == 2):
            raise unittest.SkipTest('work-around for cuFFT issue')

        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)
        return out

Source File: test_fft.py From cupy with MIT License

6 votes

def test_irfftn(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
                and int(cupy.cuda.device.get_compute_capability()) < 70
                and _size_last_transform_axis(
                    self.shape, self.s, self.axes) == 2):
            raise unittest.SkipTest('work-around for cuFFT issue')

        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out


# Only those tests in which a legit plan can be obtained are kept

Source File: test_fft.py From cupy with MIT License

6 votes

def test_irfftn(self, xp, dtype, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            plan = get_fft_plan(a, self.s, self.axes, value_type='C2R')
            with plan:
                out = xp.fft.irfftn(
                    a, s=self.s, axes=self.axes, norm=self.norm)
        else:
            out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out

    # TODO(leofang): write test_rfftn_error_on_wrong_plan()?

Source File: non_maximum_suppression.py From chainercv with MIT License

6 votes

def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

Source File: test_npz.py From cupy with MIT License

6 votes

def test_load_pickle(self):
        a = testing.shaped_arange((2, 3, 4), dtype=cupy.float32)

        sio = io.BytesIO()
        a.dump(sio)
        s = sio.getvalue()
        sio.close()

        sio = io.BytesIO(s)
        b = cupy.load(sio, allow_pickle=True)
        testing.assert_array_equal(a, b)
        sio.close()

        sio = io.BytesIO(s)
        with self.assertRaises(ValueError):
            cupy.load(sio, allow_pickle=False)
        sio.close()

Source File: test_userkernel.py From cupy with MIT License

6 votes

def test_manual_indexing(self, n=100):
        in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        uesr_kernel_1 = cupy.ElementwiseKernel(
            'T x, T y',
            'T z',
            '''
                z = x + y;
            ''',
            'uesr_kernel_1')
        out1 = uesr_kernel_1(in1, in2)

        uesr_kernel_2 = cupy.ElementwiseKernel(
            'raw T x, raw T y',
            'raw T z',
            '''
                z[i] = x[i] + y[i];
            ''',
            'uesr_kernel_2')
        out2 = uesr_kernel_2(in1, in2, size=n)

        testing.assert_array_equal(out1, out2)

Source File: test_cbpdn.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def test_15(self):
        N = 16
        Nd = 5
        K = 2
        M = 4
        D = cp.random.randn(Nd, Nd, M)
        s = cp.random.randn(N, N, K)
        dt = cp.float32
        opt = cbpdn.ConvBPDNJoint.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho': {'Enabled': True},
             'DataType': dt})
        lmbda = 1e-1
        mu = 1e-2
        b = cbpdn.ConvBPDNJoint(D, s, lmbda, mu, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: einsum.py From cupy with MIT License

5 votes

def _use_cutensor(dtype0, sub0, dtype1, sub1, batch_dims, contract_dims):
    if not cupy.cuda.cutensor_enabled:
        return False
    if dtype0 != dtype1:
        return False
    if dtype0 not in (cupy.float32, cupy.float64,
                      cupy.complex64, cupy.complex128):
        return False
    if (len(contract_dims) >= 1 and (sub0[-1] in batch_dims or
                                     sub1[-1] in batch_dims)):
        return False
    return True

Source File: test_rpca.py From sporco with BSD 3-Clause "New" or "Revised" License

5 votes

def test_04(self):
        N = 8
        D = cp.random.randn(N, N)
        dt = cp.float32
        opt = rpca.RobustPCA.Options({'Verbose': False, 'MaxMainIter': 20,
                            'AutoRho': {'Enabled': True}, 'DataType': dt})
        b = rpca.RobustPCA(D, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_fft.py From cupy with MIT License

5 votes

def test_ihfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        out = xp.fft.ihfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out

Source File: test_fft.py From cupy with MIT License

5 votes

def test_hfft(self, xp, dtype):
        a = testing.shaped_random(self.shape, xp, dtype)
        out = xp.fft.hfft(a, n=self.n, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.float32)

        return out

Source File: pure_nccl_communicator.py From pytorch-sso with MIT License

5 votes

def _get_max_kernel():
    return chainer.cuda.cupy.ReductionKernel(
        'float32 x',
        'float32 y',
        'fabsf(x)',
        'fmaxf(a, b)',
        'y = a',
        '0',
        'my_max')

Source File: test_tvl1.py From sporco with BSD 3-Clause "New" or "Revised" License

5 votes

def test_06(self):
        lmbda = 3
        dt = cp.float32
        opt = tvl1.TVL1Deconv.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        b = tvl1.TVL1Deconv(cp.ones((1, )), self.D, lmbda, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_tvl1.py From sporco with BSD 3-Clause "New" or "Revised" License

5 votes

def test_04(self):
        lmbda = 3
        dt = cp.float32
        opt = tvl1.TVL1Denoise.Options(
            {'Verbose': False, 'MaxMainIter': 20, 'AutoRho':
             {'Enabled': True}, 'DataType': dt})
        b = tvl1.TVL1Denoise(self.D, lmbda, opt=opt)
        b.solve()
        assert b.X.dtype == dt
        assert b.Y.dtype == dt
        assert b.U.dtype == dt

Source File: test_fft.py From cupy with MIT License

5 votes

def test_rfft2(self, xp, dtype, order, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if order == 'F':
            a = xp.asfortranarray(a)
        out = xp.fft.rfft2(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)
        return out

Source File: test_fft.py From cupy with MIT License

5 votes

def test_rfftn(self, xp, dtype, enable_nd):
        assert config.enable_nd_planning == enable_nd
        a = testing.shaped_random(self.shape, xp, dtype)
        if xp is cupy:
            from cupyx.scipy.fftpack import get_fft_plan
            plan = get_fft_plan(a, self.s, self.axes, value_type='R2C')
            with plan:
                out = xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)
        else:
            out = xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)

        if xp is np and dtype in [np.float16, np.float32, np.complex64]:
            out = out.astype(np.complex64)

        return out

Source File: pure_nccl_communicator.py From pytorch-sso with MIT License

5 votes

def _get_nccl_dtype(dtype):
    if dtype == np.float16:
        return nccl.NCCL_FLOAT16
    elif dtype == np.float32:
        return nccl.NCCL_FLOAT32
    elif dtype == np.float64:
        return nccl.NCCL_FLOAT64
    else:
        raise ValueError(
            'dtype must be numpy.float16, numpy.float32 or numpy.float64,'
            'not {}'.format(dtype))

Python cupy.float32() Examples