jcuda.driver.JCudaDriver#cuCtxSynchronize

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

public static void freeAll(boolean freeDontFree) {
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	LinkedList<Matrix> remainingAllocated = new LinkedList<Matrix>();
	while (!allocated.isEmpty()) {
		Matrix mat = allocated.poll();
		if (freeDontFree || !mat.dontFree) {
			mat.free();
		} else {
			remainingAllocated.add(mat);
		}
	}
	allocated = remainingAllocated;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void sqrt(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqrt");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void abs(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorAbs");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	Pointer[] Cpointers = new Pointer[C.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
		Cpointers[i] = C.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Cpointers_d = new Pointer();
	JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(Cpointers_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer info_d = new Pointer();
	JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
	Pointer pivots_d = new Pointer();
	JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(info_d);
	JCuda.cudaFree(pivots_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void scalarSet(Matrix A, float alpha) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarSet");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void exp(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorExp");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void scalarAdd(Matrix A, float alpha, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarAdd");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void mul(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMul");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void min(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMin");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

5 votes

private static void max(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMax");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

private static void dgmm(Matrix A, Matrix x, Matrix B, boolean left) {
	JCublas2.cublasSdgmm(cublasHandle, left ? cublasSideMode.CUBLAS_SIDE_LEFT : cublasSideMode.CUBLAS_SIDE_RIGHT, A.rows, A.cols, A.data_d, A.rows, x.data_d, 1, B.data_d, B.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

private static void gemm(float alpha, Matrix A, Matrix B, float beta, Matrix C) {
	JCublas2.cublasSgemm(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.rows, C.cols, B.rows, Pointer.to(new float[] {alpha}), A.data_d, A.rows, B.data_d, B.rows, Pointer.to(new float[] {beta}), C.data_d, C.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public Matrix min(float alpha) {
	Matrix result = new Matrix(rows, cols);
	min(this, result, alpha);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

private static void ger(float alpha, Matrix x, Matrix y, Matrix A) {
	JCublas2.cublasSger(cublasHandle, A.rows, A.cols, Pointer.to(new float[] {alpha}), x.data_d, 1, y.data_d, 1, A.data_d, A.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public Matrix diagAddi(Matrix diag) {
	JCublas2.cublasSaxpy(cublasHandle, diag.rows*diag.cols, Pointer.to(new float[] {1.0f}), diag.data_d, 1, this.data_d, this.rows+1);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return this;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public Matrix comb(float alpha, float beta, Matrix that) {
	Matrix result = new Matrix(rows, cols);
	JCublas2.cublasSgeam(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, rows, cols, Pointer.to(new float[] {alpha}), data_d, rows, Pointer.to(new float[] {beta}), that.data_d, that.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public static Matrix build(int rows, int cols, float[] data_h) {
	Matrix result = new Matrix(rows, cols);
	JCublas2.cublasSetMatrix(result.rows, result.cols, Sizeof.FLOAT, Pointer.to(data_h), result.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public Matrix log() {
	Matrix result = new Matrix(rows, cols);
	log(this, result);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}

Source File: CublasUtil.java From murphy with Apache License 2.0

4 votes

public float norm1() {
	float[] result = new float[1];
	JCublas2.cublasSasum(cublasHandle, rows*cols, data_d, 1, Pointer.to(result));
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result[0];
}

Java Code Examples for jcuda.driver.JCudaDriver#cuCtxSynchronize()