jcuda.driver.JCudaDriver Java Exaples

Source File: JCudaDriverSimpleLWJGL.java From jcuda-samples with MIT License

6 votes

/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}

Source File: JCudaDriverSimpleJOGL.java From jcuda-samples with MIT License

6 votes

/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}

Source File: CUDAInnerLoop.java From ocular with GNU General Public License v3.0

6 votes

public void compute(final float[] scores, final float[] whiteObservations, final float[] blackObservations, final int sequenceLength) {
	int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (BLOCK_SIZE_X*ROLL_X));
	int extendedSeqLength = gridSizeX * (BLOCK_SIZE_X*ROLL_X);
	cuMemcpyHtoD(d_Ow, Pointer.to(CudaUtil.extendWithZeros(whiteObservations, (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT)), (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT * Sizeof.FLOAT);
	cuMemcpyHtoD(d_Ob, Pointer.to(CudaUtil.extendWithZeros(blackObservations, (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT)), (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT * Sizeof.FLOAT);
	for (int tw=minTemplateWidth; tw<=maxTemplateWidth; ++tw) {
		if (templateNumIndices[tw-minTemplateWidth] > 0) {
			CUfunction function = new CUfunction();
			cuModuleGetFunction(function, cudaModule, "compute_emissions_"+tw);
			JCudaDriver.cuFuncSetCacheConfig(function, CUfunc_cache.CU_FUNC_CACHE_PREFER_SHARED);
			JCudaDriver.cuFuncSetSharedMemConfig(function, CUsharedconfig.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE);
			Pointer kernelParameters = Pointer.to(Pointer.to(new int[] {templateIndicesOffsets[tw-minTemplateWidth]*sequenceLength}), Pointer.to(new int[] {sequenceLength}), Pointer.to(new int[] {templateNumIndices[tw-minTemplateWidth]}), Pointer.to(d_Tw[tw-minTemplateWidth]), Pointer.to(d_Tb[tw-minTemplateWidth]), Pointer.to(d_Ow), Pointer.to(d_Ob), Pointer.to(d_scores));
			int gridSizeY = (int) Math.ceil(((double) templateNumIndices[tw-minTemplateWidth]) / BLOCK_SIZE_Y);
			cuLaunchKernel(function, 
					gridSizeX, gridSizeY, 1,      // Grid dimension
					BLOCK_SIZE_X, BLOCK_SIZE_Y, 1,      // Block dimension
					0, null,               // Shared memory size and stream
					kernelParameters, null // Kernel- and extra parameters
					);
		}
	}
	cuMemcpyDtoH(Pointer.to(scores), d_scores, sequenceLength*totalTemplateNumIndices * Sizeof.FLOAT);
}

Source File: JCudaDriverPrimaryContextTest.java From jcuda with MIT License

6 votes

@Test
public void testPrimaryContextCreation()
{
    JCudaDriver.setExceptionsEnabled(true);
    
    cuInit(0);

    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    
    CUcontext context = new CUcontext();
    cuDevicePrimaryCtxRetain(context, device);
    
    CUcontext nullContext = new CUcontext();
    assertFalse(context.equals(nullContext));
}

Source File: JCudaAbstractKernelTest.java From jcuda with MIT License

6 votes

/**
 * Tries to compile the specified .CU file into a PTX file, loads this
 * PTX file as a module, obtains the specified function from this module
 * and returns it.
 * 
 * @param cuFileName The .CU file name
 * @param functionName The kernel function name
 * @return The function
 * @throws CudaException If an error occurs
 */
protected final CUfunction initialize(
    String cuFileName, String functionName)
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);
   
    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    String ptxFileName = JCudaTestUtils.preparePtxFile(cuFileName);
    
    // Load the ptx file.
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function.
    CUfunction function = new CUfunction();
    cuModuleGetFunction(function, module, functionName);
    
    return function;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void scalarSet(Matrix A, float alpha) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarSet");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: Kernel.java From OSPREY3 with GNU General Public License v2.0

5 votes

public Function(String name) {
	func = new CUfunction();
	JCudaDriver.cuModuleGetFunction(func, module, name);
	pArgs = null;
	numBlocks = 1;
	blockThreads = 1;
	sharedMemCalc = new SharedMemCalculator.None();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void mul(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMul");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: JCudaDriverBasicGraphExample.java From jcuda-samples with MIT License

5 votes

/**
 * Perform a default initialization of CUDA, creating a context
 * for the first device
 */
private static void initialize()
{
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	Pointer[] Cpointers = new Pointer[C.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
		Cpointers[i] = C.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Cpointers_d = new Pointer();
	JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(Cpointers_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: GpuStream.java From OSPREY3 with GNU General Public License v2.0

5 votes

public void cleanup() {
	if (stream != null) {
		byteBuffers.cleanup();
		intBuffers.cleanup();
		longBuffers.cleanup();
		doubleBuffers.cleanup();
		try {
			JCudaDriver.cuStreamDestroy(stream);
		} catch (Throwable t) {
			t.printStackTrace(System.err);
		}
		stream = null;
	}
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer info_d = new Pointer();
	JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
	Pointer pivots_d = new Pointer();
	JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(info_d);
	JCuda.cudaFree(pivots_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: JCudaDriverHostFunction.java From jcuda-samples with MIT License

5 votes

/**
 * Entry point
 * 
 * @param args Not used
 */
public static void main(String[] args)
{
    // Default initialization
    JCudaDriver.setExceptionsEnabled(true);
    cuInit(0);
    CUcontext context = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(context, 0, device);

    // Create a stream
    CUstream stream = new CUstream();
    cuStreamCreate(stream, 0);
    
    // Define a host function and launch it
    CUhostFn fn = new CUhostFn()
    {
        @Override
        public void call(Object userData)
        {
            System.out.println("Called with " + userData);
        }
    };
    cuLaunchHostFunc(stream, fn, "Example user object");
    
    // Wait for the stream to finish
    cuStreamSynchronize(stream);

    // Clean up
    cuCtxDestroy(context);
    
    System.out.println("Done");
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void max(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMax");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CudaUtil.java From murphy with Apache License 2.0

5 votes

public static void startup(int deviceId) {
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       device = new CUdevice();
       cuDeviceGet(device, deviceId);
       context = new CUcontext();
       cuCtxCreate(context, 0, device);
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

public static Matrix build(float[][] mat) {
	Matrix result = new Matrix(mat.length, mat[0].length);
	float[] data_h = toColMajor(mat);
	JCublas2.cublasSetMatrix(result.rows, result.cols, Sizeof.FLOAT, Pointer.to(data_h), result.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

public static void freeAllBut(Collection<Matrix> keep) {
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	LinkedList<Matrix> remainingAllocated = new LinkedList<Matrix>();
	while (!allocated.isEmpty()) {
		Matrix mat = allocated.poll();
		if (!keep.contains(mat) && !mat.dontFree) {
			mat.free();
		} else {
			remainingAllocated.add(mat);
		}
	}
	allocated = remainingAllocated;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void scalarAdd(Matrix A, float alpha, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarAdd");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void min(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMin");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: Gpu.java From OSPREY3 with GNU General Public License v2.0

5 votes

public Gpu(CUdevice device) {
	
	this.device = device;
	
	// get name
	byte[] bytes = new byte[1024];
	JCudaDriver.cuDeviceGetName(bytes, bytes.length, device);
	int len = 0;
	while (bytes[len++] != 0);
	name = new String(bytes).substring(0, len - 1);
	
	// get total and free memory
	// (if it's even possible... if a GPU is out of memory, we can't even query it)
	try {
		CUcontext cuCtx = new CUcontext();
		JCudaDriver.cuCtxCreate(cuCtx, 0, device);
		long[][] longs = new long[2][1];
		JCudaDriver.cuMemGetInfo(longs[0], longs[1]);
		freeMemory = longs[0][0];
		totalMemory = longs[1][0];
		JCudaDriver.cuCtxDestroy(cuCtx);
	} catch (Throwable t) {
		// assume out of memory
		freeMemory = 0;
		totalMemory = 0;
	}
	
	// get attributes
	computeVersion = new int[] {
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR),
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
	};
	warpThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE);
	maxBlockThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X);
}

Source File: Context.java From OSPREY3 with GNU General Public License v2.0

5 votes

public Context(Gpu gpu) {
	
	this.gpu = gpu;
	
	// create the cuda context
	context = new CUcontext();
	//int flags = CUctx_flags.CU_CTX_SCHED_YIELD;
	//int flags = CUctx_flags.CU_CTX_SCHED_SPIN;
	int flags = CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC;
	JCudaDriver.cuCtxCreate(context, flags, gpu.getDevice());
	
	kernels = new HashMap<>();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void exp(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorExp");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

public float[] toArray() {
			float[] data_h = new float[rows*cols];
//			JCublas2.cublasGetVector(data_h.length, Sizeof.FLOAT, data_d, 1, Pointer.to(data_h), 1);
			JCublas2.cublasGetMatrix(rows, cols, Sizeof.FLOAT, data_d, rows, Pointer.to(data_h), rows);
			if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
			return data_h;
		}

Source File: Gpus.java From OSPREY3 with GNU General Public License v2.0

5 votes

private Gpus() {
	
	print("Discovering CUDA GPUs...");
	gpus = new ArrayList<>();

	try {
		// according to docs, init flags must always be zero
		JCudaDriver.setExceptionsEnabled(true);
		JCudaDriver.cuInit(0);
		
		// how many gpus are there?
		int[] ints = new int[1];
		JCudaDriver.cuDeviceGetCount(ints);
		int count = ints[0];
		
		// get the ones that have double support
		for (int i=0; i<count; i++) {
			
			CUdevice device = new CUdevice();
			JCudaDriver.cuDeviceGet(device, i);
			Gpu gpu = new Gpu(device);
			
			if (gpu.supportsDoubles()) {
				gpus.add(gpu);
			}
		}
	} catch (UnsatisfiedLinkError ex) {
		StringWriter buf = new StringWriter();
		ex.printStackTrace(new PrintWriter(buf));
		print(buf.toString());
	} finally {
		if (gpus.isEmpty()) {
			print(" none found\n");
		} else {
			print(" found " + gpus.size() + "\n");
		}
	}
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void sign(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSign");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: Context.java From OSPREY3 with GNU General Public License v2.0

5 votes

public synchronized void cleanup() {
	try {
		for (CUmodule kernel : kernels.values()) {
			JCudaDriver.cuModuleUnload(kernel);
		}
		kernels.clear();
		
		JCudaDriver.cuCtxDestroy(context);
	} catch (Throwable t) {
		t.printStackTrace(System.err);
	}
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void div(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorDiv");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void sqrt(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqrt");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void sqr(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqr");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void pow(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorPow");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

jcuda.driver.JCudaDriver Java Examples