Python numba.float32() Examples
The following are 30
code examples of numba.float32().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numba
, or try the search function
.
Example #1
Source File: rotate_iou.py From DSGN with MIT License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4, ), dtype=numba.float32) corners_y = cuda.local.array((4, ), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #2
Source File: iou.py From pvcnn with MIT License | 6 votes |
def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2,), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter
Example #3
Source File: shortlist.py From pyxclib with MIT License | 6 votes |
def map_neighbors(indices, similarity, labels, top_k, pad_ind, pad_val): m = indices.shape[0] point_labels = np.full( (m, top_k), pad_ind, dtype=np.int64) point_label_sims = np.full( (m, top_k), pad_val, dtype=np.float32) for i in nb.prange(m): unique_point_labels, point_label_sim = map_one( labels[indices[i]], similarity[i], pad_ind) if top_k < len(unique_point_labels): top_indices = np.argsort( point_label_sim)[-1 * top_k:][::-1] point_labels[i] = unique_point_labels[top_indices] point_label_sims[i] = point_label_sim[top_indices] else: point_labels[i, :len(unique_point_labels)] = unique_point_labels point_label_sims[i, :len(unique_point_labels)] = point_label_sim return point_labels, point_label_sims
Example #4
Source File: nonrigid.py From suite2p with GNU General Public License v3.0 | 6 votes |
def linear_interp(iy, ix, yb, xb, f): """ 2d interpolation of f on grid of yb, xb into grid of iy, ix assumes f is 3D and last two dimensions are yb,xb """ fup = f.copy().astype(np.float32) Lax = [iy.size, ix.size] for n in range(2): fup = np.transpose(fup,(1,2,0)).copy() if n==0: ds = np.abs(iy[:,np.newaxis] - yb[:,np.newaxis].T) else: ds = np.abs(ix[:,np.newaxis] - xb[:,np.newaxis].T) im1 = np.argmin(ds, axis=1) w1 = ds[np.arange(0,Lax[n],1,int),im1] ds[np.arange(0,Lax[n],1,int),im1] = np.inf im2 = np.argmin(ds, axis=1) w2 = ds[np.arange(0,Lax[n],1,int),im2] wnorm = w1+w2 w1 /= wnorm w2 /= wnorm fup = (1-w1[:,np.newaxis,np.newaxis]) * fup[im1] + (1-w2[:,np.newaxis,np.newaxis]) * fup[im2] fup = np.transpose(fup, (1,2,0)) return fup
Example #5
Source File: utils.py From mnnpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_bio_span(exprs, ndim, svd_mode, var_subset=None, **kwargs): centred = exprs - np.mean(exprs, axis=0) if var_subset is not None: subsetter = [True] * centred.shape[1] keeper = [False] * centred.shape[1] for i in var_subset: subsetter[i] = False keeper[i] = True leftovers = centred[:, subsetter].T centred = centred[:, keeper] ndim = min(ndim, *centred.shape) singular = svd_internal(centred.T, ndim, svd_mode, **kwargs) if var_subset is None: return singular[0] output = np.zeros((exprs.shape[1], ndim), dtype=np.float32) output[keeper,] = singular[0] output[subsetter,] = np.divide(np.dot(leftovers, singular[2]), singular[1][range(ndim)]) return output
Example #6
Source File: iou.py From pvcnn with MIT License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4,), dtype=numba.float32) corners_y = cuda.local.array((4,), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #7
Source File: rotate_iou.py From 3d-vehicle-tracking with BSD 3-Clause "New" or "Revised" License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4,), dtype=numba.float32) corners_y = cuda.local.array((4,), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #8
Source File: rotate_iou.py From 3d-vehicle-tracking with BSD 3-Clause "New" or "Revised" License | 6 votes |
def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2,), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter
Example #9
Source File: rotate_iou.py From PointRCNN with MIT License | 6 votes |
def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2, ), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter
Example #10
Source File: rotate_iou.py From DSGN with MIT License | 6 votes |
def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2, ), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter
Example #11
Source File: nms_gpu.py From second.pytorch with MIT License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4, ), dtype=numba.float32) corners_y = cuda.local.array((4, ), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #12
Source File: spectrum.py From spectrum_utils with Apache License 2.0 | 6 votes |
def _get_scaled_intensity_root(intensity: np.ndarray, degree: int)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. degree : int The degree of the root scaling. Returns ------- np.ndarray The root-scaled intensities. """ return np.power(intensity, 1 / degree).astype(np.float32)
Example #13
Source File: spectrum.py From spectrum_utils with Apache License 2.0 | 6 votes |
def _get_scaled_intensity_log(intensity: np.ndarray, base: int) -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. base : int The base of the log scaling. Returns ------- np.ndarray The log-scaled intensities. """ return (np.log1p(intensity) / np.log(base)).astype(np.float32)
Example #14
Source File: spectrum.py From spectrum_utils with Apache License 2.0 | 6 votes |
def _get_scaled_intensity_rank(intensity: np.ndarray, max_rank: int)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. max_rank : int The maximum rank of the rank scaling. Returns ------- np.ndarray The rank-scaled intensities. """ return ((max_rank - np.argsort(np.argsort(intensity)[::-1])) .astype(np.float32))
Example #15
Source File: rotate_iou.py From PointRCNN with MIT License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4, ), dtype=numba.float32) corners_y = cuda.local.array((4, ), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #16
Source File: rotate_iou.py From kitti-object-eval-python with MIT License | 6 votes |
def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4, ), dtype=numba.float32) corners_y = cuda.local.array((4, ), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
Example #17
Source File: rotate_iou.py From kitti-object-eval-python with MIT License | 6 votes |
def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2, ), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter
Example #18
Source File: preprocessing.py From stytra with GNU General Public License v3.0 | 6 votes |
def _process( self, im, learning_rate: Param(0.04, (0.0, 1.0)), learn_every: Param(400, (1, 10000)), only_darker: Param(True), ): messages = [] if self.background_image is None: self.background_image = im.astype(np.float32) messages.append("I:New backgorund image set") elif self.i == 0: self.background_image[:, :] = im.astype(np.float32) * np.float32( learning_rate ) + self.background_image * np.float32(1 - learning_rate) self.i = (self.i + 1) % learn_every if only_darker: return NodeOutput(messages, negdif(self.background_image, im)) else: return NodeOutput(messages, absdif(self.background_image, im))
Example #19
Source File: rotate_iou.py From DSGN with MIT License | 5 votes |
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): """rotated box iou running in gpu. 500x faster than cpu version (take 5ms in one example with numba.cuda code). convert from [this project]( https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). Args: boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, angles(clockwise when positive) query_boxes (float tensor: [K, 5]): [description] device_id (int, optional): Defaults to 0. [description] Returns: [type]: [description] """ box_dtype = boxes.dtype boxes = boxes.astype(np.float32) query_boxes = query_boxes.astype(np.float32) N = boxes.shape[0] K = query_boxes.shape[0] iou = np.zeros((N, K), dtype=np.float32) if N == 0 or K == 0: return iou threadsPerBlock = 8 * 8 cuda.select_device(device_id) blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) stream = cuda.stream() with stream.auto_synchronize(): boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) iou_dev = cuda.to_device(iou.reshape([-1]), stream) rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) return iou.astype(boxes.dtype)
Example #20
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): """rotated box iou running in gpu. 8x faster than cpu version (take 5ms in one example with numba.cuda code). convert from [this project]( https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). Args: boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, angles(clockwise when positive) query_boxes (float tensor: [K, 5]): [description] device_id (int, optional): Defaults to 0. [description] Returns: [type]: [description] """ box_dtype = boxes.dtype boxes = boxes.astype(np.float32) query_boxes = query_boxes.astype(np.float32) N = boxes.shape[0] K = query_boxes.shape[0] iou = np.zeros((N, K), dtype=np.float32) if N == 0 or K == 0: return iou threadsPerBlock = 8 * 8 cuda.select_device(device_id) blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) stream = cuda.stream() with stream.auto_synchronize(): boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) iou_dev = cuda.to_device(iou.reshape([-1]), stream) rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) return iou.astype(boxes.dtype)
Example #21
Source File: utils.py From mnnpy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def kdist(m, n): dist = np.zeros((m.shape[0], n.shape[0]), dtype=np.float32) for i in range(m.shape[0]): for j in range(n.shape[0]): dist[i, j] = np.dot(m[i], n[j]) return dist
Example #22
Source File: nonrigid.py From suite2p with GNU General Public License v3.0 | 5 votes |
def map_coordinates(I, yc, xc, Y): """ bilinear transform of image with ycoordinates yc and xcoordinates xc to Y Parameters ------------- I : int16 or float32, 2D array size [Ly x Lx] yc : 2D array size [Ly x Lx], new y coordinates xc : 2D array size [Ly x Lx], new x coordinates Returns ----------- Y : float32, 2D array size [Ly x Lx], shifted I """ Ly,Lx = I.shape yc_floor = yc.copy().astype(np.int32) xc_floor = xc.copy().astype(np.int32) yc -= yc_floor xc -= xc_floor for i in range(yc_floor.shape[0]): for j in range(yc_floor.shape[1]): yf = min(Ly-1, max(0, yc_floor[i,j])) xf = min(Lx-1, max(0, xc_floor[i,j])) yf1= min(Ly-1, yf+1) xf1= min(Lx-1, xf+1) y = yc[i,j] x = xc[i,j] Y[i,j] = (np.float32(I[yf, xf]) * (1 - y) * (1 - x) + np.float32(I[yf, xf1]) * (1 - y) * x + np.float32(I[yf1, xf]) * y * (1 - x) + np.float32(I[yf1, xf1]) * y * x )
Example #23
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def rotate_iou_kernel_eval(N, K, dev_boxes, dev_query_boxes, dev_iou, criterion=-1): threadsPerBlock = 8 * 8 row_start = cuda.blockIdx.x col_start = cuda.blockIdx.y tx = cuda.threadIdx.x row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) dev_query_box_idx = threadsPerBlock * col_start + tx dev_box_idx = threadsPerBlock * row_start + tx if (tx < col_size): block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] if (tx < row_size): block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] cuda.syncthreads() if tx < row_size: for i in range(col_size): offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5], block_boxes[tx * 5:tx * 5 + 5], criterion)
Example #24
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def rotate_iou_kernel(N, K, dev_boxes, dev_query_boxes, dev_iou): threadsPerBlock = 8 * 8 row_start = cuda.blockIdx.x col_start = cuda.blockIdx.y tx = cuda.threadIdx.x row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) dev_query_box_idx = threadsPerBlock * col_start + tx dev_box_idx = threadsPerBlock * row_start + tx if (tx < col_size): block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] if (tx < row_size): block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] cuda.syncthreads() if tx < row_size: for i in range(col_size): offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i dev_iou[offset] = devRotateIoU(block_qboxes[i * 5:i * 5 + 5], block_boxes[tx * 5:tx * 5 + 5])
Example #25
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0): """nms in gpu. WARNING: this function can provide right result but its performance isn't be tested Args: dets ([type]): [description] nms_overlap_thresh ([type]): [description] device_id ([type], optional): Defaults to 0. [description] Returns: [type]: [description] """ dets = dets.astype(np.float32) boxes_num = dets.shape[0] keep_out = np.zeros([boxes_num], dtype=np.int32) scores = dets[:, 5] order = scores.argsort()[::-1].astype(np.int32) boxes_host = dets[order, :] threadsPerBlock = 8 * 8 col_blocks = div_up(boxes_num, threadsPerBlock) cuda.select_device(device_id) # mask_host shape: boxes_num * col_blocks * sizeof(np.uint64) mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) blockspergrid = (div_up(boxes_num, threadsPerBlock), div_up(boxes_num, threadsPerBlock)) stream = cuda.stream() with stream.auto_synchronize(): boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) mask_dev = cuda.to_device(mask_host, stream) rotate_nms_kernel[blockspergrid, threadsPerBlock, stream]( boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) mask_dev.copy_to_host(mask_host, stream=stream) num_out = nms_postprocess(keep_out, mask_host, boxes_num) keep = keep_out[:num_out] return list(order[keep])
Example #26
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def rotate_nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): threadsPerBlock = 8 * 8 row_start = cuda.blockIdx.y col_start = cuda.blockIdx.x tx = cuda.threadIdx.x row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) block_boxes = cuda.shared.array(shape=(64 * 6, ), dtype=numba.float32) dev_box_idx = threadsPerBlock * col_start + tx if (tx < col_size): block_boxes[tx * 6 + 0] = dev_boxes[dev_box_idx * 6 + 0] block_boxes[tx * 6 + 1] = dev_boxes[dev_box_idx * 6 + 1] block_boxes[tx * 6 + 2] = dev_boxes[dev_box_idx * 6 + 2] block_boxes[tx * 6 + 3] = dev_boxes[dev_box_idx * 6 + 3] block_boxes[tx * 6 + 4] = dev_boxes[dev_box_idx * 6 + 4] block_boxes[tx * 6 + 5] = dev_boxes[dev_box_idx * 6 + 5] cuda.syncthreads() if (tx < row_size): cur_box_idx = threadsPerBlock * row_start + tx # cur_box = dev_boxes + cur_box_idx * 5; t = 0 start = 0 if (row_start == col_start): start = tx + 1 for i in range(start, col_size): iou = devRotateIoU(dev_boxes[cur_box_idx * 6:cur_box_idx * 6 + 5], block_boxes[i * 6:i * 6 + 5]) # print('iou', iou, cur_box_idx, i) if (iou > nms_overlap_thresh): t |= 1 << i col_blocks = ((n_boxes) // (threadsPerBlock) + ( (n_boxes) % (threadsPerBlock) > 0)) dev_mask[cur_box_idx * col_blocks + col_start] = t
Example #27
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def inter(rbbox1, rbbox2): corners1 = cuda.local.array((8, ), dtype=numba.float32) corners2 = cuda.local.array((8, ), dtype=numba.float32) intersection_corners = cuda.local.array((16, ), dtype=numba.float32) rbbox_to_corners(corners1, rbbox1) rbbox_to_corners(corners2, rbbox2) num_intersection = quadrilateral_intersection(corners1, corners2, intersection_corners) sort_vertex_in_convex_polygon(intersection_corners, num_intersection) # print(intersection_corners.reshape([-1, 2])[:num_intersection]) return area(intersection_corners, num_intersection)
Example #28
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts): a = cuda.local.array((2, ), dtype=numba.float32) b = cuda.local.array((2, ), dtype=numba.float32) c = cuda.local.array((2, ), dtype=numba.float32) d = cuda.local.array((2, ), dtype=numba.float32) a[0] = pts1[2 * i] a[1] = pts1[2 * i + 1] b[0] = pts1[2 * ((i + 1) % 4)] b[1] = pts1[2 * ((i + 1) % 4) + 1] c[0] = pts2[2 * j] c[1] = pts2[2 * j + 1] d[0] = pts2[2 * ((j + 1) % 4)] d[1] = pts2[2 * ((j + 1) % 4) + 1] area_abc = trangle_area(a, b, c) area_abd = trangle_area(a, b, d) if area_abc * area_abd >= 0: return False area_cda = trangle_area(c, d, a) area_cdb = area_cda + area_abc - area_abd if area_cda * area_cdb >= 0: return False t = area_cda / (area_abd - area_abc) dx = t * (b[0] - a[0]) dy = t * (b[1] - a[1]) temp_pts[0] = a[0] + dx temp_pts[1] = a[1] + dy return True
Example #29
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def line_segment_intersection(pts1, pts2, i, j, temp_pts): A = cuda.local.array((2, ), dtype=numba.float32) B = cuda.local.array((2, ), dtype=numba.float32) C = cuda.local.array((2, ), dtype=numba.float32) D = cuda.local.array((2, ), dtype=numba.float32) A[0] = pts1[2 * i] A[1] = pts1[2 * i + 1] B[0] = pts1[2 * ((i + 1) % 4)] B[1] = pts1[2 * ((i + 1) % 4) + 1] C[0] = pts2[2 * j] C[1] = pts2[2 * j + 1] D[0] = pts2[2 * ((j + 1) % 4)] D[1] = pts2[2 * ((j + 1) % 4) + 1] BA0 = B[0] - A[0] BA1 = B[1] - A[1] DA0 = D[0] - A[0] CA0 = C[0] - A[0] DA1 = D[1] - A[1] CA1 = C[1] - A[1] acd = DA1 * CA0 > CA1 * DA0 bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) if acd != bcd: abc = CA1 * BA0 > BA1 * CA0 abd = DA1 * BA0 > BA1 * DA0 if abc != abd: DC0 = D[0] - C[0] DC1 = D[1] - C[1] ABBA = A[0] * B[1] - B[0] * A[1] CDDC = C[0] * D[1] - D[0] * C[1] DH = BA1 * DC0 - BA0 * DC1 Dx = ABBA * DC0 - BA0 * CDDC Dy = ABBA * DC1 - BA1 * CDDC temp_pts[0] = Dx / DH temp_pts[1] = Dy / DH return True return False
Example #30
Source File: nms_gpu.py From second.pytorch with MIT License | 5 votes |
def sort_vertex_in_convex_polygon(int_pts, num_of_inter): if num_of_inter > 0: center = cuda.local.array((2, ), dtype=numba.float32) center[:] = 0.0 for i in range(num_of_inter): center[0] += int_pts[2 * i] center[1] += int_pts[2 * i + 1] center[0] /= num_of_inter center[1] /= num_of_inter v = cuda.local.array((2, ), dtype=numba.float32) vs = cuda.local.array((16, ), dtype=numba.float32) for i in range(num_of_inter): v[0] = int_pts[2 * i] - center[0] v[1] = int_pts[2 * i + 1] - center[1] d = math.sqrt(v[0] * v[0] + v[1] * v[1]) v[0] = v[0] / d v[1] = v[1] / d if v[1] < 0: v[0] = -2 - v[0] vs[i] = v[0] j = 0 temp = 0 for i in range(1, num_of_inter): if vs[i - 1] > vs[i]: temp = vs[i] tx = int_pts[2 * i] ty = int_pts[2 * i + 1] j = i while j > 0 and vs[j - 1] > temp: vs[j] = vs[j - 1] int_pts[j * 2] = int_pts[j * 2 - 2] int_pts[j * 2 + 1] = int_pts[j * 2 - 1] j -= 1 vs[j] = temp int_pts[j * 2] = tx int_pts[j * 2 + 1] = ty