Python sklearn.neighbors.KDTree() Examples
The following are 30
code examples of sklearn.neighbors.KDTree().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.neighbors
, or try the search function
.
Example #1
Source File: gard.py From scikit-downscale with Apache License 2.0 | 7 votes |
def fit(self, X, y): """ Fit Analog model using a KDTree Parameters ---------- X : pd.Series or pd.DataFrame, shape (n_samples, 1) Training data y : pd.Series or pd.DataFrame, shape (n_samples, 1) Target values. Returns ------- self : returns an instance of self. """ if len(X) < self.n_analogs: warnings.warn('length of X is less than n_analogs, setting n_analogs = len(X)') self.n_analogs = len(X) self.kdtree_ = KDTree(X, **self.kdtree_kwargs) self.y_ = y return self
Example #2
Source File: vtkPointSetOutlierRemoval.py From pcloudpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def update(self): array_full = numpy_from_polydata(self.input_) array = array_full[:,0:3] color = array_full[:,3:] #KDTree object (sklearn) kDTree = KDTree(array) dx, _ = kDTree.query(array[:, :], k = 2) dx = dx[:,1:].ravel() Indices = np.argsort(dx, axis=0) Npts = np.shape(Indices)[0] numberToKeep = int( (1 - self.percent_to_remove ) * Npts) idx = Indices[0:numberToKeep] new_array = np.copy(array[idx]) new_color = np.copy(color[idx]) array = np.c_[new_array, new_color] output = polydata_from_numpy(array) self.output_ = output
Example #3
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_nn_descent_neighbor_accuracy(): knn_indices, _ = NNDescent( nn_data, "euclidean", {}, 10, random_state=np.random )._neighbor_graph tree = KDTree(nn_data) true_indices = tree.query(nn_data, 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (nn_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.98, "NN-descent did not get 99% " "accuracy on nearest neighbors", )
Example #4
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_angular_nn_descent_neighbor_accuracy(): knn_indices, _ = NNDescent( nn_data, "cosine", {}, 10, random_state=np.random )._neighbor_graph angular_data = normalize(nn_data, norm="l2") tree = KDTree(angular_data) true_indices = tree.query(angular_data, 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (nn_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.98, "NN-descent did not get 99% " "accuracy on nearest neighbors", )
Example #5
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_sparse_nn_descent_neighbor_accuracy(): knn_indices, _ = NNDescent( sparse_nn_data, "euclidean", n_neighbors=20, random_state=None )._neighbor_graph tree = KDTree(sparse_nn_data.toarray()) true_indices = tree.query(sparse_nn_data.toarray(), 10, return_distance=False) num_correct = 0.0 for i in range(sparse_nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (sparse_nn_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.85, "Sparse NN-descent did not get 95% " "accuracy on nearest neighbors", )
Example #6
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_nn_descent_query_accuracy(): nnd = NNDescent(nn_data[200:], "euclidean", n_neighbors=10, random_state=None) knn_indices, _ = nnd.query(nn_data[:200], k=10, epsilon=0.2) tree = KDTree(nn_data[200:]) true_indices = tree.query(nn_data[:200], 10, return_distance=False) num_correct = 0.0 for i in range(true_indices.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (true_indices.shape[0] * 10) assert_greater_equal( percent_correct, 0.95, "NN-descent query did not get 95% " "accuracy on nearest neighbors", ) # @SkipTest
Example #7
Source File: eval_functions.py From hmd with MIT License | 6 votes |
def knnsearch(target, source, metrics = 'euclidean', k_size =1, leaf_sizes=30): """use target build KDTree use source to calculate it ``` """ # make sure they have the same size if not (target.shape[1] == source.shape[1]): raise('Two Inputs are not same size or They need to be [N(size), D(dimension)] input') kdt_build = KDTree(target, leaf_size = leaf_sizes, metric=metrics) distances, indices = kdt_build.query(source, k=k_size) averagedist = np.sum(distances) / (source.shape[0]) # assume they have [N,D] return (averagedist, distances, indices) # get high frequency vert list
Example #8
Source File: test_ingest.py From scanpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_neighbors(adatas): adata_ref = adatas[0].copy() adata_new = adatas[1].copy() ing = sc.tl.Ingest(adata_ref) ing.fit(adata_new) ing.neighbors(k=10) indices = ing._indices tree = KDTree(adata_ref.obsm['X_pca']) true_indices = tree.query(ing._obsm['rep'], 10, return_distance=False) num_correct = 0.0 for i in range(adata_new.n_obs): num_correct += np.sum(np.in1d(true_indices[i], indices[i])) percent_correct = num_correct / (adata_new.n_obs * 10) assert percent_correct > 0.99
Example #9
Source File: line_graph.py From PPGNet with MIT License | 6 votes |
def freeze_junction(self, status=True): self._freeze_junction = status if status: clusters = fclusterdata(self._junctions, self._eps_junc, criterion="distance") junc_groups = {} for ind_junc, ind_group in enumerate(clusters): if ind_group not in junc_groups.keys(): junc_groups[ind_group] = [] junc_groups[ind_group].append(self._junctions[ind_junc]) if self.verbose: print(f"{len(self._junctions) - len(junc_groups)} junctions merged.") self._junctions = [np.mean(junc_group, axis=0) for junc_group in junc_groups.values()] self._kdtree = KDTree(self._junctions, leaf_size=30) dists, inds = self._kdtree.query(self._junctions, k=2) repl_inds = np.nonzero(dists.sum(axis=1) < self._eps_junc)[0].tolist() # assert len(repl_inds) == 0 else: self._kdtree = None
Example #10
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_unsupervised_inputs(): # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
Example #11
Source File: alignments.py From REGAL with MIT License | 6 votes |
def kd_align(emb1, emb2, normalize=False, distance_metric = "euclidean", num_top = 50): kd_tree = KDTree(emb2, metric = distance_metric) row = np.array([]) col = np.array([]) data = np.array([]) dist, ind = kd_tree.query(emb1, k = num_top) print "queried alignments" row = np.array([]) for i in range(emb1.shape[0]): row = np.concatenate((row, np.ones(num_top)*i)) col = ind.flatten() data = np.exp(-dist).flatten() sparse_align_matrix = coo_matrix((data, (row, col)), shape=(emb1.shape[0], emb2.shape[0])) return sparse_align_matrix.tocsr()
Example #12
Source File: util.py From RelativePose with BSD 3-Clause "New" or "Revised" License | 6 votes |
def point_cloud_overlap(pc_src,pc_tgt,R_gt_44): pc_src_trans = np.matmul(R_gt_44[:3,:3],pc_src.T) +R_gt_44[:3,3:4] tree = KDTree(pc_tgt) nearest_dist, nearest_ind = tree.query(pc_src_trans.T, k=1) nns2t = np.min(nearest_dist) hasCorres=(nearest_dist < 0.08) overlap_val_s2t = hasCorres.sum()/pc_src.shape[0] pc_tgt_trans = np.matmul(np.linalg.inv(R_gt_44),np.concatenate((pc_tgt.T,np.ones([1,pc_tgt.shape[0]]))))[:3,:] tree = KDTree(pc_src) nearest_dist, nearest_ind = tree.query(pc_tgt_trans.T, k=1) nnt2s = np.min(nearest_dist) hasCorres=(nearest_dist < 0.08) overlap_val_t2s = hasCorres.sum()/pc_tgt.shape[0] overlap_val = max(overlap_val_s2t,overlap_val_t2s) cam_dist_this = np.linalg.norm(R_gt_44[:3,3]) pc_dist_this = np.linalg.norm(pc_src_trans.mean(1) - pc_tgt.T.mean(1)) pc_nn = (nns2t+nnt2s)/2 return overlap_val,cam_dist_this,pc_dist_this,pc_nn
Example #13
Source File: find_squad_nearby_words.py From adversarial-squad with MIT License | 6 votes |
def get_nearby_words(main_words): main_inds = {} all_words = [] all_vecs = [] with open(OPTS.wordvec_file) as f: for i, line in tqdm(enumerate(f)): toks = line.rstrip().split(' ') word = unicode(toks[0], encoding='ISO-8859-1') vec = np.array([float(x) for x in toks[1:]]) all_words.append(word) all_vecs.append(vec) if word in main_words: main_inds[word] = i print >> sys.stderr, 'Found vectors for %d/%d words = %.2f%%' % ( len(main_inds), len(main_words), 100.0 * len(main_inds) / len(main_words)) tree = KDTree(all_vecs) nearby_words = {} for word in tqdm(main_inds): dists, inds = tree.query([all_vecs[main_inds[word]]], k=OPTS.num_neighbors + 1) nearby_words[word] = [ {'word': all_words[i], 'dist': d} for d, i in zip(dists[0], inds[0])] return nearby_words
Example #14
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_sparse_angular_nn_descent_neighbor_accuracy(): knn_indices, _ = NNDescent( sparse_nn_data, "cosine", {}, 20, random_state=None )._neighbor_graph angular_data = normalize(sparse_nn_data, norm="l2").toarray() tree = KDTree(angular_data) true_indices = tree.query(angular_data, 10, return_distance=False) num_correct = 0.0 for i in range(sparse_nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (sparse_nn_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.85, "Sparse angular NN-descent did not get 98% " "accuracy on nearest neighbors", )
Example #15
Source File: test_pynndescent_.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_random_state_none(): knn_indices, _ = NNDescent( nn_data, "euclidean", {}, 10, random_state=None )._neighbor_graph tree = KDTree(nn_data) true_indices = tree.query(nn_data, 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (spatial_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "NN-descent did not get 99% " "accuracy on nearest neighbors", )
Example #16
Source File: test_neighbors.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.neighbors.NearestNeighbors, neighbors.NearestNeighbors) self.assertIs(df.neighbors.KNeighborsClassifier, neighbors.KNeighborsClassifier) self.assertIs(df.neighbors.RadiusNeighborsClassifier, neighbors.RadiusNeighborsClassifier) self.assertIs(df.neighbors.KNeighborsRegressor, neighbors.KNeighborsRegressor) self.assertIs(df.neighbors.RadiusNeighborsRegressor, neighbors.RadiusNeighborsRegressor) self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid) self.assertIs(df.neighbors.BallTree, neighbors.BallTree) self.assertIs(df.neighbors.KDTree, neighbors.KDTree) self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric) self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity)
Example #17
Source File: trustscore.py From alibi with Apache License 2.0 | 6 votes |
def filter_by_distance_knn(self, X: np.ndarray) -> np.ndarray: """ Filter out instances with low kNN density. Calculate distance to k-nearest point in the data for each instance and remove instances above a cutoff distance. Parameters ---------- X Data Returns ------- Filtered data. """ kdtree = KDTree(X, leaf_size=self.leaf_size, metric=self.metric) knn_r = kdtree.query(X, k=self.k_filter + 1)[0] # distances from 0 to k-nearest points if self.dist_filter_type == 'point': knn_r = knn_r[:, -1] elif self.dist_filter_type == 'mean': knn_r = np.mean(knn_r[:, 1:], axis=1) # exclude distance of instance to itself cutoff_r = np.percentile(knn_r, (1 - self.alpha) * 100) # cutoff distance X_keep = X[np.where(knn_r <= cutoff_r)[0], :] # define instances to keep return X_keep
Example #18
Source File: generate_training_tuples_baseline.py From pointnetvlad with MIT License | 6 votes |
def construct_query_dict(df_centroids, filename): tree = KDTree(df_centroids[['northing','easting']]) ind_nn = tree.query_radius(df_centroids[['northing','easting']],r=10) ind_r = tree.query_radius(df_centroids[['northing','easting']], r=50) queries={} for i in range(len(ind_nn)): query=df_centroids.iloc[i]["file"] positives=np.setdiff1d(ind_nn[i],[i]).tolist() negatives=np.setdiff1d(df_centroids.index.values.tolist(),ind_r[i]).tolist() random.shuffle(negatives) queries[i]={"query":query,"positives":positives,"negatives":negatives} with open(filename, 'wb') as handle: pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL) print("Done ", filename) ####Initialize pandas DataFrame
Example #19
Source File: generate_training_tuples_refine.py From pointnetvlad with MIT License | 6 votes |
def construct_query_dict(df_centroids, filename): tree = KDTree(df_centroids[['northing','easting']]) ind_nn = tree.query_radius(df_centroids[['northing','easting']],r=12.5) ind_r = tree.query_radius(df_centroids[['northing','easting']], r=50) queries={} print(len(ind_nn)) for i in range(len(ind_nn)): query=df_centroids.iloc[i]["file"] positives=np.setdiff1d(ind_nn[i],[i]).tolist() negatives=np.setdiff1d(df_centroids.index.values.tolist(),ind_r[i]).tolist() random.shuffle(negatives) queries[i]={"query":query,"positives":positives,"negatives":negatives} with open(filename, 'wb') as handle: pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL) print("Done ", filename)
Example #20
Source File: utils.py From slicesim with MIT License | 6 votes |
def run(clients, base_stations, run_at, assign=True): print(f'KDTREE CALL [{run_at}] - limit: {KDTree.limit}') if run_at == KDTree.last_run_time: return KDTree.last_run_time = run_at c_coor = [(c.x,c.y) for c in clients] bs_coor = [p.coverage.center for p in base_stations] tree = kdt(bs_coor, leaf_size=2) res = tree.query(c_coor,k=min(KDTree.limit,len(base_stations))) # print(res[0]) for c, d, p in zip(clients, res[0], res[1]): if assign and d[0] <= base_stations[p[0]].coverage.radius: c.base_station = base_stations[p[0]] c.closest_base_stations = [(a, base_stations[b]) for a,b in zip(d,p)]
Example #21
Source File: provider.py From scanobjectnn with MIT License | 6 votes |
def occlude_point_cloud(batch_data, occlusion_ratio): """ Randomly k remove points (number of points defined by the ratio. Input: BxNx3 array, original batch of point clouds Return: Bx(N-k)x3 array, occluded batch of point clouds """ B, N, C = batch_data.shape k = int(np.round(N*occlusion_ratio)) occluded_batch_point_cloud = [] for i in range(B): point_cloud = batch_data[i, :, :] kdt = KDTree(point_cloud, leaf_size=30, metric='euclidean') center_of_occlusion = random.choice(point_cloud) #occluded_points_idx = kdt.query_radius(center_of_occlusion.reshape(1, -1), r=occlusion_radius) _, occluded_points_idx = kdt.query(center_of_occlusion.reshape(1, -1), k=k) point_cloud = np.delete(point_cloud, occluded_points_idx, axis=0) occluded_batch_point_cloud.append(point_cloud) return np.array(occluded_batch_point_cloud)
Example #22
Source File: test_neighbors.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_unsupervised_inputs(): # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
Example #23
Source File: preprocessing.py From DPLink with MIT License | 6 votes |
def load_vids(data_path, data_name="baseLoc"): vid_list = {} vid_lookup = {} vid_array = [] poi_info = json.load(open(data_path + "poi_info.json")) with open(data_path + data_name) as fid: for line in fid: bid, lat, lon = line.strip("\r\n").split("_") lat, lon = float(lat), float(lon) if bid not in vid_list: cid = len(vid_list) + 1 vid_list[bid] = [cid, (lat, lon), poi_info[bid][3:]] vid_lookup[cid] = [bid, (lat, lon)] vid_array.append((lat, lon)) vid_array = np.array(vid_array) kdtree = KDTree(vid_array) return vid_list, vid_lookup, kdtree
Example #24
Source File: lru_knn.py From emdqn with MIT License | 6 votes |
def load(self, action): try: assert(os.path.exists(self.bufpath)) lru = np.load(os.path.join(self.bufpath, 'lru_%d.npy'%action)) cap = lru.shape[0] self.curr_capacity = cap self.tm = np.max(lru) + 0.01 self.buildnum = self.buildnum_max self.states[:cap] = np.load(os.path.join(self.bufpath, 'states_%d.npy'%action)) self.q_values_decay[:cap] = np.load(os.path.join(self.bufpath, 'q_values_decay_%d.npy'%action)) self.lru[:cap] = lru self.tree = KDTree(self.states[:self.curr_capacity]) print ("load %d-th buffer success, cap=%d" % (action, cap)) except: print ("load %d-th buffer failed" % action)
Example #25
Source File: pc_util.py From path_invariance_map_network with BSD 3-Clause "New" or "Revised" License | 6 votes |
def surface_variant_para(stored, pcndex, pc): num_neighbour = 10 pca = PCA() kdt = KDTree(pc, leaf_size=100, metric='euclidean') ### For each point we get the surface variant hm = np.zeros(pc.shape[0]) idx = kdt.query(pc,k=num_neighbour)[1] for i in range(len(idx)): data = pc[idx[i],:] pca.fit(data) lambdas = pca.singular_values_ hm[i] = lambdas[2]/float(sum(lambdas)) if np.isnan(hm[i]): hm[i] = 0 ### Normalize the surface variant here minv = np.min(hm) maxv = np.max(hm) if float(maxv - minv) == 0: stored[pcndex] = np.ones(hm.shape) else: stored[pcndex] = (hm-minv)/float(maxv - minv)*0.9+0.1
Example #26
Source File: word2vec.py From QAbot_by_base_KG with MIT License | 6 votes |
def neighbours(self, word, size = 10): """ Get nearest words with KDTree, ranking by cosine distance """ word = word.strip() v = self.word_vec(word) [distances], [points] = self.kdt.query(array([v]), k = size, return_distance = True) assert len(distances) == len(points), "distances and points should be in same shape." words, scores = [], {} for (x,y) in zip(points, distances): w = self.index2word[x] if w == word: s = 1.0 else: s = utils.cosine(v, self.syn0[x]) if s < 0: s = abs(s) words.append(w) scores[w] = min(s, 1.0) for x in sorted(words, key=scores.get, reverse=True): yield x, scores[x]
Example #27
Source File: word2vec.py From Synonyms with MIT License | 6 votes |
def neighbours(self, word, size = 10): """ Get nearest words with KDTree, ranking by cosine distance """ word = word.strip() v = self.word_vec(word) [distances], [points] = self.kdt.query(array([v]), k = size, return_distance = True) assert len(distances) == len(points), "distances and points should be in same shape." words, scores = [], {} for (x,y) in zip(points, distances): w = self.index2word[x] if w == word: s = 1.0 else: s = cosine(v, self.syn0[x]) if s < 0: s = abs(s) words.append(w) scores[w] = min(s, 1.0) for x in sorted(words, key=scores.get, reverse=True): yield x, scores[x]
Example #28
Source File: subset.py From PVGeo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _query(topo_points, data_points): """Querrys the data points for their closest point on the topography surface""" try: # sklearn's KDTree is faster: use it if available from sklearn.neighbors import KDTree as Tree except ImportError: from scipy.spatial import cKDTree as Tree tree = Tree(topo_points) i = tree.query(data_points)[1].ravel() return topo_points[i]
Example #29
Source File: knn.py From pyFTS with GNU General Public License v3.0 | 5 votes |
def train(self, data, **kwargs): X,Y = self._prepare_xy(data) self.kdtree = KDTree(np.array(X)) self.values = Y self.shortname = "kNN({})-{}".format(self.order, self.alpha)
Example #30
Source File: slicing.py From PVGeo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_planes(self, pts): """Internal helper to generate planes for the slices""" try: # sklearn's KDTree is faster: use it if available from sklearn.neighbors import KDTree as Tree except ImportError: from scipy.spatial import cKDTree as Tree if self.get_number_of_slices() == 0: return [] # Get the Points over the NumPy interface wpdi = dsa.WrapDataObject(pts) # NumPy wrapped points points = np.array(wpdi.Points) # New NumPy array of points so we dont destroy input numPoints = pts.GetNumberOfPoints() if self.__useNearestNbr: tree = Tree(points) ptsi = tree.query([points[0]], k=numPoints)[1].ravel() else: ptsi = [i for i in range(numPoints)] # Iterate of points in order (skips last point): planes = [] for i in range(0, numPoints - 1, numPoints//self.get_number_of_slices()): # get normal pts1 = points[ptsi[i]] pts2 = points[ptsi[i+1]] x1, y1, z1 = pts1[0], pts1[1], pts1[2] x2, y2, z2 = pts2[0], pts2[1], pts2[2] normal = [x2-x1,y2-y1,z2-z1] # create plane plane = self._generate_plane([x1,y1,z1], normal) planes.append(plane) return planes