Python sklearn.neighbors.BallTree() Examples
The following are 21
code examples of sklearn.neighbors.BallTree().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.neighbors
, or try the search function
.
Example #1
Source File: test_neighbors.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_unsupervised_inputs(): # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
Example #2
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_unsupervised_inputs(): # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) dist1, ind1 = nbrs_fid.kneighbors(X) nbrs = neighbors.NearestNeighbors(n_neighbors=1) for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)): nbrs.fit(input) dist2, ind2 = nbrs.kneighbors(X) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
Example #3
Source File: fMRI.py From mmvt with GNU General Public License v3.0 | 6 votes |
def calc_vert_vals(verts, pts, vals, method='max', k_points=100): from sklearn.neighbors import BallTree ball_tree = BallTree(pts) k_points = min([k_points, len(pts)]) dists, pts_inds = ball_tree.query(verts, k=k_points, return_distance=True) near_vals = vals[pts_inds] # sig_dists = dists[np.where(abs(near_vals)>2)] cover = len(np.unique(pts_inds.ravel()))/float(len(pts)) print('{}% of the points are covered'.format(cover*100)) if method == 'dist': n_dists = 1/(dists**2) norm = 1/np.sum(n_dists, 1) norm = np.reshape(norm, (len(norm), 1)) n_dists = norm * n_dists verts_vals = np.sum(near_vals * n_dists, 1) elif method == 'max': verts_vals = near_vals[range(near_vals.shape[0]), np.argmax(abs(near_vals), 1)] return verts_vals
Example #4
Source File: test_neighbors.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.neighbors.NearestNeighbors, neighbors.NearestNeighbors) self.assertIs(df.neighbors.KNeighborsClassifier, neighbors.KNeighborsClassifier) self.assertIs(df.neighbors.RadiusNeighborsClassifier, neighbors.RadiusNeighborsClassifier) self.assertIs(df.neighbors.KNeighborsRegressor, neighbors.KNeighborsRegressor) self.assertIs(df.neighbors.RadiusNeighborsRegressor, neighbors.RadiusNeighborsRegressor) self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid) self.assertIs(df.neighbors.BallTree, neighbors.BallTree) self.assertIs(df.neighbors.KDTree, neighbors.KDTree) self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric) self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity)
Example #5
Source File: test_distances.py From pynndescent with BSD 2-Clause "Simplified" License | 6 votes |
def test_haversine(): tree = BallTree(spatial_data[:, :2], metric="haversine") dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0]) test_matrix = np.array( [ [ dist.haversine(spatial_data[i, :2], spatial_data[j, :2]) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) test_matrix.sort(axis=1) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric haversine", )
Example #6
Source File: mahalanobis_batch.py From perfect_match with MIT License | 6 votes |
def make_propensity_lists(self, train_ids, benchmark): input_data, ids, pair_data = benchmark.get_data_access().get_rows(train_ids) assignments = map(benchmark.get_assignment, ids, input_data) treatment_data, batch_y = zip(*assignments) treatment_data = np.array(treatment_data) if pair_data.shape[-1] > 200 and False: self.pca = PCA(50, svd_solver="randomized") pair_data = self.pca.fit_transform(pair_data) else: self.pca = None # covariance_matrix = np.cov(pair_data, rowvar=False) self.original_data = [pair_data[treatment_data == t] for t in range(benchmark.get_num_treatments())] # self.ball_trees = [BallTree(pair_data[treatment_data == t], metric="mahalanobis", # V=covariance_matrix) # for t in range(benchmark.get_num_treatments())] self.ball_trees = [BallTree(pair_data[treatment_data == t]) for t in range(benchmark.get_num_treatments())] self.treatment_ids = [ids[treatment_data == t] for t in range(benchmark.get_num_treatments())]
Example #7
Source File: test_umap_metrics.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_haversine(spatial_data): tree = BallTree(spatial_data[:, :2], metric="haversine") dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0]) test_matrix = np.array( [ [ dist.haversine(spatial_data[i, :2], spatial_data[j, :2]) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) test_matrix.sort(axis=1) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric haversine", )
Example #8
Source File: utils.py From mdentropy with MIT License | 6 votes |
def avgdigamma(data, dvec, leaf_size=16): """Convenience function for finding expectation value of <psi(nx)> given some number of neighbors in some radius in a marginal space. Parameters ---------- points : numpy.ndarray dvec : array_like (n_points,) Returns ------- avgdigamma : float expectation value of <psi(nx)> """ tree = BallTree(data, leaf_size=leaf_size, p=float('inf')) n_points = tree.query_radius(data, dvec - EPS, count_only=True) return digamma(n_points).mean()
Example #9
Source File: recommender.py From atap with Apache License 2.0 | 5 votes |
def transform(self, documents): return [ BallTree(documents) ]
Example #10
Source File: test_t_sne.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] distances_nn = np.array([distances[i, neighbors_nn[i]] for i in range(n_samples)]) assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\ abs(distances[0, neighbors_nn[0]] - distances_nn[0]) P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
Example #11
Source File: knn.py From pyodds with MIT License | 5 votes |
def fit(self, X): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : dataframe of shape (n_samples, n_features) The input samples. """ # validate inputs X and y (optional) X = X.to_numpy() if self.metric_params is not None: self.tree_ = BallTree(X, leaf_size=self.leaf_size, metric=self.metric, **self.metric_params) else: self.tree_ = BallTree(X, leaf_size=self.leaf_size, metric=self.metric) self.neigh_.fit(X) dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors, return_distance=True) dist = self._get_dist_by_method(dist_arr) self.decision_scores_ = dist.ravel() self._process_decision_scores() return self
Example #12
Source File: Advanced_server.py From Autocomplete-System with MIT License | 5 votes |
def __init__(self, num_corrections=10, num_basic_results=10, home_dir=".", embedding_json=None, vocab_int_json=None, *args, **kwargs): super().__init__(num_res_return=num_basic_results, *args, **kwargs) self.use_embedding = False if embedding_json and vocab_int_json: self.use_embedding = True embedding_json = path.join(home_dir, embedding_json) vocab_int_json = path.join(home_dir, vocab_int_json) # load json files print("Loading JSON files, may take a while.") with open(embedding_json, 'r') as read_file: self.embeddings = np.array(json.load(read_file)) with open(vocab_int_json, 'r') as read_file: self.vocab_int = json.load(read_file) self.int_vocab = {i: word for word, i in self.vocab_int.items()} # train k nearest neighbor model print("Training BallTree k-nearest neighbor searcher...") self.searcher = BallTree(self.embeddings, leaf_size=10) self.checker = Spell.Spell() self.num_corrections = num_corrections self.num_basic_search_results = num_basic_results self.max_total_res = min(10, num_basic_results+num_corrections) print("Ready to use.")
Example #13
Source File: center_batch_generator.py From 3d-semantic-segmentation with MIT License | 5 votes |
def _calc_ball_trees(self, metric='euclidean'): ball_trees = [] for pointcloud_data in tqdm(self.dataset.data, desc='Ball trees have to be calculated from scratch'): ball_trees.append(BallTree(pointcloud_data[:, :2], metric=metric)) return ball_trees
Example #14
Source File: evaluation.py From 3d-semantic-segmentation with MIT License | 5 votes |
def knn_interpolation(cumulated_pc: np.ndarray, full_sized_data: np.ndarray, k=5): """ Using k-nn interpolation to find labels of points of the full sized pointcloud :param cumulated_pc: cumulated pointcloud results after running the network :param full_sized_data: full sized point cloud :param k: k for k nearest neighbor interpolation :return: pointcloud with predicted labels in last column and ground truth labels in last but one column """ labeled = cumulated_pc[cumulated_pc[:, -1] != -1] to_be_predicted = full_sized_data.copy() ball_tree = BallTree(labeled[:, :3], metric='euclidean') knn_classes = labeled[ball_tree.query(to_be_predicted[:, :3], k=k)[1]][:, :, -1].astype(int) interpolated = np.zeros(knn_classes.shape[0]) for i in range(knn_classes.shape[0]): interpolated[i] = np.bincount(knn_classes[i]).argmax() output = np.zeros((to_be_predicted.shape[0], to_be_predicted.shape[1]+1)) output[:, :-1] = to_be_predicted output[:, -1] = interpolated return output
Example #15
Source File: recommender.py From atap with Apache License 2.0 | 5 votes |
def fit_transform(self, documents): # Transformer will be False if pipeline hasn't been fit yet, # Trigger fit_transform and save the transformer and lexicon. if self.transformer == False: self.transformer = Pipeline([ ('norm', TextNormalizer(minimum=50, maximum=200)), ('transform', Pipeline([ ('tfidf', TfidfVectorizer()), ('svd', TruncatedSVD(n_components=200)) ]) ) ]) self.lexicon = self.transformer.fit_transform(documents) self.tree = BallTree(self.lexicon) self.save()
Example #16
Source File: EC_functions.py From Model-Free-Episodic-Control with MIT License | 5 votes |
def update_tree(self, time): print 'rebuild tree' self.tree = BallTree(self.state[:self.items, :], leaf_size=self.size) self.last_tree_built_time = time print 'rebuild done'
Example #17
Source File: entropy_estimators.py From NPEET with MIT License | 5 votes |
def build_tree(points): if points.shape[1] >= 20: return BallTree(points, metric='chebyshev') return KDTree(points, metric='chebyshev') # TESTS
Example #18
Source File: hamming_ann.py From klsh with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X): X = self._validate_input(X, return_compact=False) self._tree = BallTree(X, metric='hamming', leaf_size=self.leaf_size) return self
Example #19
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] distances_nn = np.array([distances[i, neighbors_nn[i]] for i in range(n_samples)]) assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\ abs(distances[0, neighbors_nn[0]] - distances_nn[0]) P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
Example #20
Source File: test_utils.py From numpy-ml with GNU General Public License v3.0 | 4 votes |
def test_ball_tree(N=1): np.random.seed(12345) i = 0 while i < N: N = np.random.randint(2, 100) M = np.random.randint(2, 100) k = np.random.randint(1, N) ls = np.min([np.random.randint(1, 10), N - 1]) X = np.random.rand(N, M) BT = BallTree(leaf_size=ls, metric=euclidean) BT.fit(X) x = np.random.rand(M) mine = BT.nearest_neighbors(k, x) assert len(mine) == k mine_neighb = np.array([n.key for n in mine]) mine_dist = np.array([n.distance for n in mine]) sort_ix = np.argsort(mine_dist) mine_dist = mine_dist[sort_ix] mine_neighb = mine_neighb[sort_ix] sk = sk_BallTree(X, leaf_size=ls) theirs_dist, ind = sk.query(x.reshape(1, -1), k=k) sort_ix = np.argsort(theirs_dist.flatten()) theirs_dist = theirs_dist.flatten()[sort_ix] theirs_neighb = X[ind.flatten()[sort_ix]] for j in range(len(theirs_dist)): np.testing.assert_almost_equal(mine_neighb[j], theirs_neighb[j]) np.testing.assert_almost_equal(mine_dist[j], theirs_dist[j]) print("PASSED") i += 1 ####################################################################### # Graphs # #######################################################################
Example #21
Source File: knn.py From pyod with BSD 2-Clause "Simplified" License | 4 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.neigh_.fit(X) # In certain cases, _tree does not exist for NearestNeighbors # See Issue #158 (https://github.com/yzhao062/pyod/issues/158) # n_neighbors = 100 if self.neigh_._tree is not None: self.tree_ = self.neigh_._tree else: if self.metric_params is not None: self.tree_ = BallTree(X, leaf_size=self.leaf_size, metric=self.metric, **self.metric_params) else: self.tree_ = BallTree(X, leaf_size=self.leaf_size, metric=self.metric) dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors, return_distance=True) dist = self._get_dist_by_method(dist_arr) self.decision_scores_ = dist.ravel() self._process_decision_scores() return self