jcuda.driver.CUdevice Java Exaples

Source File: ExecutionConfig.java From systemds with Apache License 2.0

6 votes

/**
 * Get the CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X of the given device
 *
 * @param deviceNumber device number of the given device
 * @return The maximum block dimension, in x-direction
 */
private static int getMaxBlockDim(int deviceNumber) {
	//    	return 32;
	// TODO: Use JCudaDriver.cuOccupancyMaxPotentialBlockSize to chose the block size that maximizes occupancy
	Integer ret = maxBlockDimForDevice.get(deviceNumber);
	if (ret == null) {
		CUdevice device = new CUdevice();
		JCudaKernels.checkResult(jcuda.driver.JCudaDriver.cuDeviceGet(device, deviceNumber));
		int maxBlockDimX[] = { 0 };
		jcuda.driver.JCudaDriver
				.cuDeviceGetAttribute(maxBlockDimX, CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
		maxBlockDimForDevice.put(deviceNumber, maxBlockDimX[0]);
		return maxBlockDimX[0];
	}
	return ret;
}

Source File: JCudaDriverSimpleLWJGL.java From jcuda-samples with MIT License

6 votes

/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}

Source File: JCudaDriverSimpleJOGL.java From jcuda-samples with MIT License

6 votes

/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}

Source File: JCudaAbstractKernelTest.java From jcuda with MIT License

6 votes

/**
 * Tries to compile the specified .CU file into a PTX file, loads this
 * PTX file as a module, obtains the specified function from this module
 * and returns it.
 * 
 * @param cuFileName The .CU file name
 * @param functionName The kernel function name
 * @return The function
 * @throws CudaException If an error occurs
 */
protected final CUfunction initialize(
    String cuFileName, String functionName)
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);
   
    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    String ptxFileName = JCudaTestUtils.preparePtxFile(cuFileName);
    
    // Load the ptx file.
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function.
    CUfunction function = new CUfunction();
    cuModuleGetFunction(function, module, functionName);
    
    return function;
}

Source File: JCudaDriverPrimaryContextTest.java From jcuda with MIT License

6 votes

@Test
public void testPrimaryContextCreation()
{
    JCudaDriver.setExceptionsEnabled(true);
    
    cuInit(0);

    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    
    CUcontext context = new CUcontext();
    cuDevicePrimaryCtxRetain(context, device);
    
    CUcontext nullContext = new CUcontext();
    assertFalse(context.equals(nullContext));
}

Source File: ExecutionConfig.java From systemds with Apache License 2.0

6 votes

/**
 * Get the CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X of the given device
 *
 * @param deviceNumber device number of the given device
 * @return The maximum block dimension, in x-direction
 */
private static int getMaxBlockDim(int deviceNumber) {
	//    	return 32;
	// TODO: Use JCudaDriver.cuOccupancyMaxPotentialBlockSize to chose the block size that maximizes occupancy
	Integer ret = maxBlockDimForDevice.get(deviceNumber);
	if (ret == null) {
		CUdevice device = new CUdevice();
		JCudaKernels.checkResult(jcuda.driver.JCudaDriver.cuDeviceGet(device, deviceNumber));
		int maxBlockDimX[] = { 0 };
		jcuda.driver.JCudaDriver
				.cuDeviceGetAttribute(maxBlockDimX, CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
		maxBlockDimForDevice.put(deviceNumber, maxBlockDimX[0]);
		return maxBlockDimX[0];
	}
	return ret;
}

Source File: JCudaSamplesUtils.java From jcuda-samples with MIT License

5 votes

/**
 * Compute the compute capability of the device device of the current
 * context. The compute capability will be returned as an int value 
 * <code>major * 10 + minor</code>. For example, the return value
 * will be <code>52</code> for a device with compute capability 5.2.
 * 
 * @return The compute capability of the current device
 * @throws CudaException If there is no current context
 */
private static int computeComputeCapability()
{
    CUdevice device = new CUdevice();
    int status = cuCtxGetDevice(device);
    if (status != CUresult.CUDA_SUCCESS)
    {
        throw new CudaException(CUresult.stringFor(status));
    }
    return computeComputeCapability(device);
}

Source File: Gpus.java From OSPREY3 with GNU General Public License v2.0

5 votes

private Gpus() {
	
	print("Discovering CUDA GPUs...");
	gpus = new ArrayList<>();

	try {
		// according to docs, init flags must always be zero
		JCudaDriver.setExceptionsEnabled(true);
		JCudaDriver.cuInit(0);
		
		// how many gpus are there?
		int[] ints = new int[1];
		JCudaDriver.cuDeviceGetCount(ints);
		int count = ints[0];
		
		// get the ones that have double support
		for (int i=0; i<count; i++) {
			
			CUdevice device = new CUdevice();
			JCudaDriver.cuDeviceGet(device, i);
			Gpu gpu = new Gpu(device);
			
			if (gpu.supportsDoubles()) {
				gpus.add(gpu);
			}
		}
	} catch (UnsatisfiedLinkError ex) {
		StringWriter buf = new StringWriter();
		ex.printStackTrace(new PrintWriter(buf));
		print(buf.toString());
	} finally {
		if (gpus.isEmpty()) {
			print(" none found\n");
		} else {
			print(" found " + gpus.size() + "\n");
		}
	}
}

Source File: Gpu.java From OSPREY3 with GNU General Public License v2.0

5 votes

public Gpu(CUdevice device) {
	
	this.device = device;
	
	// get name
	byte[] bytes = new byte[1024];
	JCudaDriver.cuDeviceGetName(bytes, bytes.length, device);
	int len = 0;
	while (bytes[len++] != 0);
	name = new String(bytes).substring(0, len - 1);
	
	// get total and free memory
	// (if it's even possible... if a GPU is out of memory, we can't even query it)
	try {
		CUcontext cuCtx = new CUcontext();
		JCudaDriver.cuCtxCreate(cuCtx, 0, device);
		long[][] longs = new long[2][1];
		JCudaDriver.cuMemGetInfo(longs[0], longs[1]);
		freeMemory = longs[0][0];
		totalMemory = longs[1][0];
		JCudaDriver.cuCtxDestroy(cuCtx);
	} catch (Throwable t) {
		// assume out of memory
		freeMemory = 0;
		totalMemory = 0;
	}
	
	// get attributes
	computeVersion = new int[] {
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR),
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
	};
	warpThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE);
	maxBlockThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X);
}

Source File: JCudaDriverTextureTest.java From jcuda with MIT License

5 votes

@Test
public void testTextures()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create the PTX file by calling the NVCC
    String ptxFileName = JCudaTestUtils.preparePtxFile(
        "src/test/resources/kernels/JCudaDriverTextureTestKernels.cu");

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUcontext pctx = new CUcontext();
    CUdevice dev = new CUdevice();
    cuDeviceGet(dev, 0);
    cuCtxCreate(pctx, 0, dev);

    // Load the file containing the kernels
    module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Initialize the host input data
    initInputHost();

    // Perform the tests
    assertTrue(test_float_1D());
    assertTrue(test_float_2D());
    assertTrue(test_float_3D());
    assertTrue(test_float4_1D());
    assertTrue(test_float4_2D());
    assertTrue(test_float4_3D());
}

Source File: CudaUtil.java From murphy with Apache License 2.0

5 votes

public static void startup(int deviceId) {
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       device = new CUdevice();
       cuDeviceGet(device, deviceId);
       context = new CUcontext();
       cuCtxCreate(context, 0, device);
}

Source File: JCudaSamplesUtils.java From jcuda-samples with MIT License

5 votes

/**
 * Compute the compute capability of the given device. The compute 
 * capability will be returned as an int value 
 * <code>major * 10 + minor</code>. For example, the return value
 * will be <code>52</code> for a device with compute capability 5.2.
 * 
 * @param device The device
 * @return The compute capability
 */
private static int computeComputeCapability(CUdevice device)
{
    int majorArray[] = { 0 };
    int minorArray[] = { 0 };
    cuDeviceGetAttribute(majorArray,
        CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
    cuDeviceGetAttribute(minorArray,
        CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device);
    int major = majorArray[0];
    int minor = minorArray[0];
    return major * 10 + minor;
}

Source File: JCudaDriverHostFunction.java From jcuda-samples with MIT License

5 votes

/**
 * Entry point
 * 
 * @param args Not used
 */
public static void main(String[] args)
{
    // Default initialization
    JCudaDriver.setExceptionsEnabled(true);
    cuInit(0);
    CUcontext context = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(context, 0, device);

    // Create a stream
    CUstream stream = new CUstream();
    cuStreamCreate(stream, 0);
    
    // Define a host function and launch it
    CUhostFn fn = new CUhostFn()
    {
        @Override
        public void call(Object userData)
        {
            System.out.println("Called with " + userData);
        }
    };
    cuLaunchHostFunc(stream, fn, "Example user object");
    
    // Wait for the stream to finish
    cuStreamSynchronize(stream);

    // Clean up
    cuCtxDestroy(context);
    
    System.out.println("Done");
}

Source File: JCudaDriverStreamCallbacks.java From jcuda-samples with MIT License

5 votes

/**
 * Initialize the driver API, the {@link #context} and the 
 * kernel {@link #function} 
 */
private static void initialize()
{
    System.out.println("Initializing...");
    
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);

    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    context = new CUcontext();
    cuCtxCreate(context, 0, device);

    nvrtcProgram program = new nvrtcProgram();
    nvrtcCreateProgram(
        program, programSourceCode, null, 0, null, null);
    nvrtcCompileProgram(program, 0, null);
    
    String[] ptx = new String[1];
    nvrtcGetPTX(program, ptx);
    nvrtcDestroyProgram(program);

    CUmodule module = new CUmodule();
    cuModuleLoadData(module, ptx[0]);

    function = new CUfunction();
    cuModuleGetFunction(function, module, "example");
    
    System.out.println("Initializing DONE");
}

Source File: JCudaReduction.java From jcuda-samples with MIT License

5 votes

/**
 * Initialize the context, module, function and other elements used 
 * in this sample
 */
private static void init()
{
    // Initialize the driver API and create a context for the first device
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Create the PTX file by calling the NVCC
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaReductionKernel.cu");
    
    // Load the module from the PTX file
    module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the "reduce" function.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "reduce");
    
    // Allocate a chunk of temporary memory (must be at least
    // numberOfBlocks * Sizeof.FLOAT)
    deviceBuffer = new CUdeviceptr();
    cuMemAlloc(deviceBuffer, 1024 * Sizeof.FLOAT);
    
}

Source File: JCudaDriverBasicGraphExample.java From jcuda-samples with MIT License

5 votes

/**
 * Perform a default initialization of CUDA, creating a context
 * for the first device
 */
private static void initialize()
{
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);
}

Source File: JCudaConstantMemoryExample.java From jcuda-samples with MIT License

4 votes

public static void main(String[] args) throws IOException 
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Create the PTX file by calling the NVCC
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaConstantMemoryKernel.cu");

    // Load the PTX file.
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain the pointer to the constant memory, and print some info
    CUdeviceptr constantMemoryPointer = new CUdeviceptr();
    long constantMemorySizeArray[] = { 0 };
    cuModuleGetGlobal(constantMemoryPointer, constantMemorySizeArray, 
        module, "constantMemoryData");
    int constantMemorySize = (int)constantMemorySizeArray[0];
    
    System.out.println("constantMemoryPointer: " + constantMemoryPointer);
    System.out.println("constantMemorySize: " + constantMemorySize);

    // Copy some host data to the constant memory
    int numElements = constantMemorySize / Sizeof.FLOAT;
    float hostData[] = new float[numElements];
    for (int i = 0; i < numElements; i++)
    {
        hostData[i] = i;
    }
    cuMemcpyHtoD(constantMemoryPointer, 
        Pointer.to(hostData), constantMemorySize);
    
    // Now use the constant memory in the kernel call:
    
    // Obtain a function pointer to the "constantMemoryKernel" function.
    CUfunction kernel = new CUfunction();
    cuModuleGetFunction(kernel, module, "constantMemoryKernel");

    // Allocate some device memory
    CUdeviceptr deviceData = new CUdeviceptr();
    cuMemAlloc(deviceData, constantMemorySize);
    
    // Set up the kernel parameters
    Pointer kernelParameters = Pointer.to(
        Pointer.to(deviceData),
        Pointer.to(new int[]{numElements})
    );
    
    // Launch the kernel
    int blockSizeX = numElements;
    int gridSizeX = 1;
    cuLaunchKernel(kernel,
        gridSizeX,  1, 1, 
        blockSizeX, 1, 1,
        0, null,         
        kernelParameters, null 
    );
    cuCtxSynchronize();
    
    // Copy the result back to the host, and verify that it is
    // the same that was copied to the constant memory
    float hostResult[] = new float[numElements];
    cuMemcpyDtoH(Pointer.to(hostResult), deviceData, constantMemorySize);
    
    boolean passed = Arrays.equals(hostData,  hostResult);
    System.out.println("Test " + (passed ? "PASSED" : "FAILED"));
}

Source File: JCudaDriverUnifiedMemory.java From jcuda-samples with MIT License

4 votes

public static void main(String[] args)
{
    JCudaDriver.setExceptionsEnabled(true);
    JCublas.setExceptionsEnabled(true);
    
    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);
    
    // Check if the device supports managed memory
    int supported[] = { 0 };
    cuDeviceGetAttribute(supported, 
        CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, device);
    if (supported[0] == 0)
    {
        System.err.println("Device does not support managed memory");
        return;
    }

    // Allocate managed memory that is accessible to the host
    int n = 10;
    long size = n * Sizeof.FLOAT;
    CUdeviceptr p = new CUdeviceptr();
    cuMemAllocManaged(p, size, CU_MEM_ATTACH_HOST);

    // Obtain the byte buffer from the pointer. This is supported only
    // for memory that was allocated to be accessible on the host:
    ByteBuffer bb = p.getByteBuffer(0, size);
    
    System.out.println("Buffer on host side: " + bb);

    // Fill the buffer with sample data
    FloatBuffer fb = bb.order(ByteOrder.nativeOrder()).asFloatBuffer();
    for (int i = 0; i < n; i++)
    {
        fb.put(i, i);
    }

    // Make the buffer accessible to all devices
    cuStreamAttachMemAsync(null, p, 0,  CU_MEM_ATTACH_GLOBAL);
    cuStreamSynchronize(null);

    // Use the pointer in a device operation (here, a dot product with 
    // JCublas, for example). The data that was filled in by the host
    // will now be used by the device.
    cublasHandle handle = new cublasHandle();
    cublasCreate(handle);
    float result[] = { -1.0f };
    cublasSdot(handle, n, p, 1, p, 1, Pointer.to(result));
    System.out.println("Result: " + result[0]);
}

Source File: JCudaDynamicParallelism.java From jcuda-samples with MIT License

4 votes

public static void main(String[] args)
{
    JCudaDriver.setExceptionsEnabled(true);

    // Initialize a context for the first device
    cuInit(0);
    CUcontext context = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(context, 0, device);

    // Create the CUBIN file by calling the NVCC. 
    // See the prepareDefaultCubinFile method for the details about
    // the NVCC parameters that are used here. 
    String cubinFileName = JCudaSamplesUtils.prepareDefaultCubinFile(
        "src/main/resources/kernels/JCudaDynamicParallelismKernel.cu");

    // Load the CUBIN file 
    CUmodule module = new CUmodule();
    cuModuleLoad(module, cubinFileName);

    // Obtain a function pointer to the "parentKernel" function.
    CUfunction function = new CUfunction();
    cuModuleGetFunction(function, module, "parentKernel");

    // Define the nesting structure. 
    // 
    // NOTE: The number of child threads MUST match the value that 
    // is used in the kernel, for the childKernel<<<1, 8>>> call!
    // 
    int numParentThreads = 8;
    int numChildThreads = 8;

    // Allocate the device data that will be filled by the kernel
    int numElements = numParentThreads * numChildThreads;
    CUdeviceptr deviceData = new CUdeviceptr();
    cuMemAlloc(deviceData, numElements * Sizeof.FLOAT);

    // Set up the kernel parameters: A pointer to an array
    // of pointers which point to the actual values.
    Pointer kernelParameters = Pointer.to(
        Pointer.to(new int[] { numElements }),
        Pointer.to(deviceData)
    );

    // Call the kernel function.
    int blockSizeX = numParentThreads;
    int gridSizeX = (numElements + numElements - 1) / blockSizeX;
    cuLaunchKernel(function,
        gridSizeX,  1, 1,      // Grid dimension
        blockSizeX, 1, 1,      // Block dimension
        0, null,               // Shared memory size and stream
        kernelParameters, null // Kernel- and extra parameters
    );
    cuCtxSynchronize();

    // Copy the device data to the host
    float hostData[] = new float[numElements];
    for(int i = 0; i < numElements; i++)
    {
        hostData[i] = i;
    }
    cuMemcpyDtoH(Pointer.to(hostData), 
        deviceData, numElements * Sizeof.FLOAT);

    // Compare the host data with the expected values
    float hostDataRef[] = new float[numElements];
    for(int i = 0; i < numParentThreads; i++)
    {
        for (int j=0; j < numChildThreads; j++)
        {
            hostDataRef[i * numChildThreads + j] = i + 0.1f * j;
        }
    }
    System.out.println("Result: "+Arrays.toString(hostData));
    boolean passed = Arrays.equals(hostData, hostDataRef);
    System.out.println(passed ? "PASSED" : "FAILED");

    // Clean up.
    cuMemFree(deviceData);
}

Source File: JCudaDriverMemRangeTest.java From jcuda with MIT License

4 votes

@Test
public void testMemRangeAttribute()
{
    JCudaDriver.setExceptionsEnabled(true);
    
    cuInit(0);
    CUcontext contest = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(contest, 0, device);
    
    int size = 64;
    CUdeviceptr deviceData = new CUdeviceptr();
    cuMemAllocManaged(deviceData, size, CU_MEM_ATTACH_HOST);
    
    int readMostly[] = { 12345 };
    int lastPrefetchLocation[] = { 12345 };
    int preferredLocation[] = { 12345 };
    int accessedBy[] = { 12345, 12345, 12345 };
    
    cuMemRangeGetAttribute(Pointer.to(readMostly), Sizeof.INT, 
        CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY, deviceData, size);

    cuMemRangeGetAttribute(Pointer.to(lastPrefetchLocation), Sizeof.INT, 
        CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION, deviceData, size);

    cuMemRangeGetAttribute(Pointer.to(preferredLocation), Sizeof.INT, 
        CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION, deviceData, size);

    cuMemRangeGetAttribute(
        Pointer.to(accessedBy), Sizeof.INT * accessedBy.length, 
        CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY, deviceData, size);

    boolean printResults = false;
    //printResults = true;
    if (printResults)
    {
        System.out.println("readMostly          : " + 
            Arrays.toString(lastPrefetchLocation));
        System.out.println("lastPrefetchLocation: " + 
            Arrays.toString(lastPrefetchLocation));
        System.out.println("preferredLocation   : " + 
            Arrays.toString(preferredLocation));
        System.out.println("accessedBy          : " + 
            Arrays.toString(accessedBy));
    }
}

Source File: JCudaDriverMemRangeTest.java From jcuda with MIT License

4 votes

@Test
public void testMemRangeAttributes()
{
    JCudaDriver.setExceptionsEnabled(true);
    
    cuInit(0);
    CUcontext contest = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(contest, 0, device);
    
    int size = 64;
    CUdeviceptr deviceData = new CUdeviceptr();
    cuMemAllocManaged(deviceData, size, CU_MEM_ATTACH_HOST);
    
    int readMostly[] = { 12345 };
    int lastPrefetchLocation[] = { 12345 };
    int preferredLocation[] = { 12345 };
    int accessedBy[] = { 12345, 12345, 12345 };
    
    Pointer data[] =  
    {
        Pointer.to(readMostly),
        Pointer.to(lastPrefetchLocation),
        Pointer.to(preferredLocation),
        Pointer.to(accessedBy) 
    };
    long dataSizes[] = 
    {
        Sizeof.INT, 
        Sizeof.INT, 
        Sizeof.INT, 
        Sizeof.INT * accessedBy.length
    };
    int attributes[] =  
    {
        CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
        CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
        CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
        CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,
    };
    cuMemRangeGetAttributes(data, dataSizes, 
        attributes, attributes.length, deviceData, size);
    
    boolean printResults = false;
    //printResults = true;
    if (printResults)
    {
        System.out.println("readMostly          : " + 
            Arrays.toString(lastPrefetchLocation));
        System.out.println("lastPrefetchLocation: " + 
            Arrays.toString(lastPrefetchLocation));
        System.out.println("preferredLocation   : " + 
            Arrays.toString(preferredLocation));
        System.out.println("accessedBy          : " + 
            Arrays.toString(accessedBy));
    }
}

Source File: Gpu.java From OSPREY3 with GNU General Public License v2.0

4 votes

public CUdevice getDevice() {
	return device;
}

Source File: VecDoubleSample.java From jcuda-samples with MIT License

4 votes

public static void main(String[] args)
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Afterwards, initialize the vector library, which will
    // attach to the current context
    VecDouble.init();
    
    // Allocate and fill the host input data
    int n = 50000;
    double hostX[] = new double[n];
    double hostY[] = new double[n];
    for(int i = 0; i < n; i++)
    {
        hostX[i] = (double)i;
        hostY[i] = (double)i;
    }

    // Allocate the device pointers, and copy the
    // host input data to the device
    CUdeviceptr deviceX = new CUdeviceptr();
    cuMemAlloc(deviceX, n * Sizeof.DOUBLE);
    cuMemcpyHtoD(deviceX, Pointer.to(hostX), n * Sizeof.DOUBLE);

    CUdeviceptr deviceY = new CUdeviceptr();
    cuMemAlloc(deviceY, n * Sizeof.DOUBLE); 
    cuMemcpyHtoD(deviceY, Pointer.to(hostY), n * Sizeof.DOUBLE);

    CUdeviceptr deviceResult = new CUdeviceptr();
    cuMemAlloc(deviceResult, n * Sizeof.DOUBLE);

    // Perform the vector operations
    VecDouble.cos(n, deviceX, deviceX);               // x = cos(x)  
    VecDouble.mul(n, deviceX, deviceX, deviceX);      // x = x*x
    VecDouble.sin(n, deviceY, deviceY);               // y = sin(y)
    VecDouble.mul(n, deviceY, deviceY, deviceY);      // y = y*y
    VecDouble.add(n, deviceResult, deviceX, deviceY); // result = x+y

    // Allocate host output memory and copy the device output
    // to the host.
    double hostResult[] = new double[n];
    cuMemcpyDtoH(Pointer.to(hostResult), deviceResult, n * Sizeof.DOUBLE);

    // Verify the result
    boolean passed = true;
    for(int i = 0; i < n; i++)
    {
        double expected = 
            Math.cos(hostX[i])*Math.cos(hostX[i])+
            Math.sin(hostY[i])*Math.sin(hostY[i]);
        if (Math.abs(hostResult[i] - expected) > 1e-14)
        {
            System.out.println(
                "At index "+i+ " found "+hostResult[i]+
                " but expected "+expected);
            passed = false;
            break;
        }
    }
    System.out.println("Test "+(passed?"PASSED":"FAILED"));

    // Clean up.
    cuMemFree(deviceX);
    cuMemFree(deviceY);
    cuMemFree(deviceResult);
    VecDouble.shutdown();
}

Source File: VecFloatSample.java From jcuda-samples with MIT License

4 votes

public static void main(String[] args)
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Afterwards, initialize the vector library, which will
    // attach to the current context
    VecFloat.init();
    
    // Allocate and fill the host input data
    int n = 50000;
    float hostX[] = new float[n];
    float hostY[] = new float[n];
    for(int i = 0; i < n; i++)
    {
        hostX[i] = (float)i;
        hostY[i] = (float)i;
    }

    // Allocate the device pointers, and copy the
    // host input data to the device
    CUdeviceptr deviceX = new CUdeviceptr();
    cuMemAlloc(deviceX, n * Sizeof.FLOAT);
    cuMemcpyHtoD(deviceX, Pointer.to(hostX), n * Sizeof.FLOAT);

    CUdeviceptr deviceY = new CUdeviceptr();
    cuMemAlloc(deviceY, n * Sizeof.FLOAT); 
    cuMemcpyHtoD(deviceY, Pointer.to(hostY), n * Sizeof.FLOAT);

    CUdeviceptr deviceResult = new CUdeviceptr();
    cuMemAlloc(deviceResult, n * Sizeof.FLOAT);

    // Perform the vector operations
    VecFloat.cos(n, deviceX, deviceX);               // x = cos(x)  
    VecFloat.mul(n, deviceX, deviceX, deviceX);      // x = x*x
    VecFloat.sin(n, deviceY, deviceY);               // y = sin(y)
    VecFloat.mul(n, deviceY, deviceY, deviceY);      // y = y*y
    VecFloat.add(n, deviceResult, deviceX, deviceY); // result = x+y

    // Allocate host output memory and copy the device output
    // to the host.
    float hostResult[] = new float[n];
    cuMemcpyDtoH(Pointer.to(hostResult), deviceResult, n * Sizeof.FLOAT);

    // Verify the result
    boolean passed = true;
    for(int i = 0; i < n; i++)
    {
        float expected = (float)(
            Math.cos(hostX[i])*Math.cos(hostX[i])+
            Math.sin(hostY[i])*Math.sin(hostY[i]));
        if (Math.abs(hostResult[i] - expected) > 1e-5)
        {
            System.out.println(
                "At index "+i+ " found "+hostResult[i]+
                " but expected "+expected);
            passed = false;
            break;
        }
    }
    System.out.println("Test "+(passed?"PASSED":"FAILED"));

    // Clean up.
    cuMemFree(deviceX);
    cuMemFree(deviceY);
    cuMemFree(deviceResult);
    VecFloat.shutdown();
}

jcuda.driver.CUdevice Java Examples