Python Examples of theano.sandbox.cuda.CudaNdarray

Source File: test_tensor_op.py From D-VAE with MIT License

6 votes

def test_deepcopy():
    a = cuda.fmatrix()
    a_v = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))

    # We force the c code to check that we generate c code
    mode = theano.Mode("c", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))

    # We force the python linker as the default code should work for this op
    mode = theano.Mode("py", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))

Source File: test_pycuda_utils.py From D-VAE with MIT License

6 votes

def test_to_cudandarray():
    px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    cx = to_cudandarray(px)
    assert isinstance(cx, cuda.CudaNdarray)
    assert numpy.allclose(px.get(),
                          numpy.asarray(cx))
    assert px.dtype == cx.dtype
    assert px.shape == cx.shape
    assert all(numpy.asarray(cx._strides) * 4 == px.strides)

    try:
        px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
        to_cudandarray(px)
        assert False
    except ValueError:
        pass

    try:
        to_cudandarray(numpy.zeros(4))
        assert False
    except ValueError:
        pass

Source File: test_pycuda_theano_simple.py From D-VAE with MIT License

6 votes

def test_pycuda_theano():
    """Simple example with pycuda function and Theano CudaNdarray object."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)

    # Test with Theano object
    ga = cuda_ndarray.CudaNdarray(a)
    gb = cuda_ndarray.CudaNdarray(b)
    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
    multiply_them(dest, ga, gb,
                  block=(400, 1, 1), grid=(1, 1))
    assert (numpy.asarray(dest) == a * b).all()

Source File: test_cuda_ndarray.py From D-VAE with MIT License

6 votes

def test_getshape():
    shapelist = [
            ((1, 2, 3), (1, 2, 3)),
            ((1,), (1,)),
            ((1, 2, 3), (3, 2, 1)),
            ((1, 2, 3), (6,)),
            ((1, 2, 3, 2), (6, 2)),
            ((2, 3, 2), (6, 2))
             ]

    def subtest(shape):
        a = theano._asarray(numpy.random.rand(*shape_1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert b.shape == a.shape

    for shape_1, shape_2 in shapelist:
        subtest(shape_1)
        subtest(shape_2)

Source File: test_cuda_ndarray.py From D-VAE with MIT License

6 votes

def test_setitem_matrix_bad_type():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8], dtype='float64')

    # test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1, :, :] = b
        assert False
    except TypeError as e:
        # print e
        assert True

Source File: test_cuda_ndarray.py From D-VAE with MIT License

6 votes

def test_setitem_assign_to_slice():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8, 9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # first get a slice of a
    _c = _a[:, :, 1]

    # set middle row through cube to 7,8,9
    # (this corresponds to middle row of matrix _c)
    _c[:, 1] = _b

    a[:, :, 1][:, 1] = b
    assert numpy.allclose(a, numpy.asarray(_a))

    # test direct transfert from numpy
    _d = _a[1, :, :]
    _d[1, :] = b*10
    a[1, :, :][1, :] = b*10
    assert numpy.allclose(a, numpy.asarray(_a))

Source File: test_cuda_ndarray.py From D-VAE with MIT License

6 votes

def test_base():
    # Test that the 'base' attribute of a CudaNdarray is the one
    # built initially, not an intermediate one.
    a = cuda_ndarray.CudaNdarray.zeros((3, 4, 5))
    for i in xrange(5):
        b = a[:]
    assert b.base is a

    c = a[0]
    d = c[:, 0]
    # print d.shape
    assert c.base is a
    assert d.base is a

    e = b.reshape((5, 2, 2, 3))
    assert e.base is a

Source File: test_cuda_ndarray.py From D-VAE with MIT License

6 votes

def test_set_strides():
    a = cuda_ndarray.CudaNdarray.zeros((5, 5))

    # Test with tuple
    new_strides = (a.strides[1], a.strides[0])
    a.strides = new_strides
    assert a.strides == new_strides

    # Test with list
    new_strides = (a.strides[1], a.strides[0])
    a.strides = [a.strides[1], a.strides[0]]
    assert a.strides == new_strides

    try:
        a.strides = (a.strides[1],)
        assert False
    except ValueError:
        pass

    try:
        a.strides = (1, 1, 1)
        assert False
    except ValueError:
        pass

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def speed_elemwise_collapse():
    """ used to time if the collapse of ccontiguous dims are useful """

    shape = (30, 40, 50, 600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:, ::2, :, :]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b * tensor.exp(1 + b ** a3)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    v = v[:, ::2, :, :]
    v = cuda_ndarray.CudaNdarray(v)
    t1 = time.time()
    for i in range(100):
        # let debugmode catch errors
        f(v)
    t2 = time.time()

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def speed_elemwise_collapse2():
    """ used to test the speed up of the generalised collapse of
    ccontiguous dims"""

    shape = (30, 40, 50, 600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:, :, :, ::2]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b * tensor.exp(1 + b ** a3)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    v = v[:, :, :, ::2]
    v = cuda_ndarray.CudaNdarray(v)
    t1 = time.time()
    for i in range(100):
        # let debugmode catch errors
        f(v)
    t2 = time.time()

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse():
    """ Test when all inputs have one(and the same) broadcastable dimension """

    shape = (4, 5, 60)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, True, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 1, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)

    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
    # print "Expected collapse of all dimensions"

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse2():
    """ Test when only one inputs have one broadcastable dimension """

    shape = (4, 5, 9)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
    # print "Expected collapse to 3 dimensions"

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse5():
    """ Test when only one inputs have two broadcastable dimension at
    the beginning and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, 4, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)

    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v + 2)
    # print "Expected collapse to 2 dimensions"

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse6():
    """ Test when all inputs have two broadcastable dimension at the
    beginning"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((True, True, False, False))()
    f = pfunc([b], [a3 + b], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
    # print "Expected collapse to c contiguous"

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the
    other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a.copy(), 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    f = pfunc([], [a3 + 2], mode=mode_with_gpu)

    # let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
    # print "Expected collapse to c contiguous"

Source File: test_basic_ops.py From D-VAE with MIT License

6 votes

def test_elemwise_collapse4():
    """ Test when only one inputs have two broadcastable dimension at
    each ends and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 0, 1, 'x')
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, shape[0], shape[1], 4),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, shape[0], shape[1], 1) + v + 2)
    # print "Expected collapse to 3 dimensions"

Source File: test_debugmode.py From attention-lvcsr with MIT License

6 votes

def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val)

Source File: gnumpy_utils.py From attention-lvcsr with MIT License

6 votes

def garray_to_cudandarray(x):
        """ take a gnumpy.garray and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, gnumpy.garray):
            raise ValueError("We can transfer only gnumpy.garray to CudaNdarray")
        # elif x.dtype != "float32":
        #     raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = strides[::-1]
            for i in range(len(strides)):
                if x.shape[i] == 1:
                    strides[i] = 0
            strides = tuple(strides)

            import ctypes
            ptr_long = long(ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
            return z

Source File: test_pycuda_utils.py From attention-lvcsr with MIT License

6 votes

def test_to_cudandarray():
    px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    cx = to_cudandarray(px)
    assert isinstance(cx, cuda.CudaNdarray)
    assert numpy.allclose(px.get(),
                          numpy.asarray(cx))
    assert px.dtype == cx.dtype
    assert px.shape == cx.shape
    assert all(numpy.asarray(cx._strides) * 4 == px.strides)

    try:
        px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
        to_cudandarray(px)
        assert False
    except ValueError:
        pass

    try:
        to_cudandarray(numpy.zeros(4))
        assert False
    except ValueError:
        pass

Source File: test_pycuda_theano_simple.py From attention-lvcsr with MIT License

6 votes

def test_pycuda_theano():
    """Simple example with pycuda function and Theano CudaNdarray object."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)

    # Test with Theano object
    ga = cuda_ndarray.CudaNdarray(a)
    gb = cuda_ndarray.CudaNdarray(b)
    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
    multiply_them(dest, ga, gb,
                  block=(400, 1, 1), grid=(1, 1))
    assert (numpy.asarray(dest) == a * b).all()

Source File: pycuda_utils.py From attention-lvcsr with MIT License

6 votes

def to_cudandarray(x):
    """ take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory

    :note: CudaNdarray support only float32, so only float32 GPUArray are accepted
    """
    if not isinstance(x, pycuda.gpuarray.GPUArray):
        raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
    elif x.dtype != "float32":
        raise ValueError("CudaNdarray support only float32")
    else:
        strides = [1]
        for i in x.shape[::-1][:-1]:
            strides.append(strides[-1] * i)
        strides = tuple(strides[::-1])
        ptr = int(x.gpudata)  # in pycuda trunk, y.ptr also works, which is a little cleaner
        z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
        return z