Java Code Examples for jcuda.driver.JCudaDriver#cuCtxSynchronize()

The following examples show how to use jcuda.driver.JCudaDriver#cuCtxSynchronize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
public static void freeAll(boolean freeDontFree) {
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	LinkedList<Matrix> remainingAllocated = new LinkedList<Matrix>();
	while (!allocated.isEmpty()) {
		Matrix mat = allocated.poll();
		if (freeDontFree || !mat.dontFree) {
			mat.free();
		} else {
			remainingAllocated.add(mat);
		}
	}
	allocated = remainingAllocated;
}
 
Example 2
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void sqrt(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqrt");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 3
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void abs(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorAbs");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 4
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	Pointer[] Cpointers = new Pointer[C.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
		Cpointers[i] = C.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Cpointers_d = new Pointer();
	JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(Cpointers_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 5
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer info_d = new Pointer();
	JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
	Pointer pivots_d = new Pointer();
	JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(info_d);
	JCuda.cudaFree(pivots_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 6
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void scalarSet(Matrix A, float alpha) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarSet");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 7
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void exp(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorExp");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 8
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void scalarAdd(Matrix A, float alpha, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarAdd");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 9
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void mul(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMul");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 10
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void min(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMin");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 11
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void max(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMax");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 12
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
private static void dgmm(Matrix A, Matrix x, Matrix B, boolean left) {
	JCublas2.cublasSdgmm(cublasHandle, left ? cublasSideMode.CUBLAS_SIDE_LEFT : cublasSideMode.CUBLAS_SIDE_RIGHT, A.rows, A.cols, A.data_d, A.rows, x.data_d, 1, B.data_d, B.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 13
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
private static void gemm(float alpha, Matrix A, Matrix B, float beta, Matrix C) {
	JCublas2.cublasSgemm(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.rows, C.cols, B.rows, Pointer.to(new float[] {alpha}), A.data_d, A.rows, B.data_d, B.rows, Pointer.to(new float[] {beta}), C.data_d, C.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 14
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public Matrix min(float alpha) {
	Matrix result = new Matrix(rows, cols);
	min(this, result, alpha);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}
 
Example 15
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
private static void ger(float alpha, Matrix x, Matrix y, Matrix A) {
	JCublas2.cublasSger(cublasHandle, A.rows, A.cols, Pointer.to(new float[] {alpha}), x.data_d, 1, y.data_d, 1, A.data_d, A.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example 16
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public Matrix diagAddi(Matrix diag) {
	JCublas2.cublasSaxpy(cublasHandle, diag.rows*diag.cols, Pointer.to(new float[] {1.0f}), diag.data_d, 1, this.data_d, this.rows+1);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return this;
}
 
Example 17
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public Matrix comb(float alpha, float beta, Matrix that) {
	Matrix result = new Matrix(rows, cols);
	JCublas2.cublasSgeam(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, rows, cols, Pointer.to(new float[] {alpha}), data_d, rows, Pointer.to(new float[] {beta}), that.data_d, that.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}
 
Example 18
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public static Matrix build(int rows, int cols, float[] data_h) {
	Matrix result = new Matrix(rows, cols);
	JCublas2.cublasSetMatrix(result.rows, result.cols, Sizeof.FLOAT, Pointer.to(data_h), result.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}
 
Example 19
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public Matrix log() {
	Matrix result = new Matrix(rows, cols);
	log(this, result);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}
 
Example 20
Source File: CublasUtil.java    From murphy with Apache License 2.0 4 votes vote down vote up
public float norm1() {
	float[] result = new float[1];
	JCublas2.cublasSasum(cublasHandle, rows*cols, data_d, 1, Pointer.to(result));
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result[0];
}