Python sklearn.neighbors.BallTree() Examples

The following are 21 code examples of sklearn.neighbors.BallTree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.neighbors , or try the search function .
Example #1
Source File: test_neighbors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example #2
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example #3
Source File: fMRI.py    From mmvt with GNU General Public License v3.0 6 votes vote down vote up
def calc_vert_vals(verts, pts, vals, method='max', k_points=100):
    from sklearn.neighbors import BallTree
    ball_tree = BallTree(pts)
    k_points = min([k_points, len(pts)])
    dists, pts_inds = ball_tree.query(verts, k=k_points, return_distance=True)
    near_vals = vals[pts_inds]
    # sig_dists = dists[np.where(abs(near_vals)>2)]
    cover = len(np.unique(pts_inds.ravel()))/float(len(pts))
    print('{}% of the points are covered'.format(cover*100))
    if method == 'dist':
        n_dists = 1/(dists**2)
        norm = 1/np.sum(n_dists, 1)
        norm = np.reshape(norm, (len(norm), 1))
        n_dists = norm * n_dists
        verts_vals = np.sum(near_vals * n_dists, 1)
    elif method == 'max':
        verts_vals = near_vals[range(near_vals.shape[0]), np.argmax(abs(near_vals), 1)]
    return verts_vals 
Example #4
Source File: test_neighbors.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.neighbors.NearestNeighbors,
                      neighbors.NearestNeighbors)
        self.assertIs(df.neighbors.KNeighborsClassifier,
                      neighbors.KNeighborsClassifier)
        self.assertIs(df.neighbors.RadiusNeighborsClassifier,
                      neighbors.RadiusNeighborsClassifier)
        self.assertIs(df.neighbors.KNeighborsRegressor,
                      neighbors.KNeighborsRegressor)
        self.assertIs(df.neighbors.RadiusNeighborsRegressor,
                      neighbors.RadiusNeighborsRegressor)
        self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
        self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
        self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
        self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
        self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity) 
Example #5
Source File: test_distances.py    From pynndescent with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_haversine():
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    ) 
Example #6
Source File: mahalanobis_batch.py    From perfect_match with MIT License 6 votes vote down vote up
def make_propensity_lists(self, train_ids, benchmark):
        input_data, ids, pair_data = benchmark.get_data_access().get_rows(train_ids)
        assignments = map(benchmark.get_assignment, ids, input_data)
        treatment_data, batch_y = zip(*assignments)
        treatment_data = np.array(treatment_data)

        if pair_data.shape[-1] > 200 and False:
            self.pca = PCA(50, svd_solver="randomized")
            pair_data = self.pca.fit_transform(pair_data)
        else:
            self.pca = None

        # covariance_matrix = np.cov(pair_data, rowvar=False)
        self.original_data = [pair_data[treatment_data == t]
                              for t in range(benchmark.get_num_treatments())]
        # self.ball_trees = [BallTree(pair_data[treatment_data == t], metric="mahalanobis",
        #                             V=covariance_matrix)
        #                    for t in range(benchmark.get_num_treatments())]
        self.ball_trees = [BallTree(pair_data[treatment_data == t])
                           for t in range(benchmark.get_num_treatments())]
        self.treatment_ids = [ids[treatment_data == t]
                              for t in range(benchmark.get_num_treatments())] 
Example #7
Source File: test_umap_metrics.py    From umap with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_haversine(spatial_data):
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    ) 
Example #8
Source File: utils.py    From mdentropy with MIT License 6 votes vote down vote up
def avgdigamma(data, dvec, leaf_size=16):
    """Convenience function for finding expectation value of <psi(nx)> given
    some number of neighbors in some radius in a marginal space.

    Parameters
    ----------
    points : numpy.ndarray
    dvec : array_like (n_points,)
    Returns
    -------
    avgdigamma : float
        expectation value of <psi(nx)>
    """
    tree = BallTree(data, leaf_size=leaf_size, p=float('inf'))

    n_points = tree.query_radius(data, dvec - EPS, count_only=True)

    return digamma(n_points).mean() 
Example #9
Source File: recommender.py    From atap with Apache License 2.0 5 votes vote down vote up
def transform(self, documents):
        return [
            BallTree(documents)
        ] 
Example #10
Source File: test_t_sne.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3) 
Example #11
Source File: knn.py    From pyodds with MIT License 5 votes vote down vote up
def fit(self, X):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : dataframe of shape (n_samples, n_features)
            The input samples.
        """

        # validate inputs X and y (optional)
        X = X.to_numpy()

        if self.metric_params is not None:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric,
                                  **self.metric_params)
        else:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric)
        self.neigh_.fit(X)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
Example #12
Source File: Advanced_server.py    From Autocomplete-System with MIT License 5 votes vote down vote up
def __init__(self, num_corrections=10, num_basic_results=10,
                 home_dir=".",
                 embedding_json=None,
                 vocab_int_json=None, *args, **kwargs):
        super().__init__(num_res_return=num_basic_results, *args, **kwargs)

        self.use_embedding = False

        if embedding_json and vocab_int_json:
            self.use_embedding = True
            embedding_json = path.join(home_dir, embedding_json)
            vocab_int_json = path.join(home_dir, vocab_int_json)
            # load json files
            print("Loading JSON files, may take a while.")
            with open(embedding_json, 'r') as read_file:
                self.embeddings = np.array(json.load(read_file))
            with open(vocab_int_json, 'r') as read_file:
                self.vocab_int = json.load(read_file)
            self.int_vocab = {i: word for word, i in self.vocab_int.items()}

            # train k nearest neighbor model
            print("Training BallTree k-nearest neighbor searcher...")
            self.searcher = BallTree(self.embeddings, leaf_size=10)

        self.checker = Spell.Spell()
        self.num_corrections = num_corrections
        self.num_basic_search_results = num_basic_results
        self.max_total_res = min(10, num_basic_results+num_corrections)

        print("Ready to use.") 
Example #13
Source File: center_batch_generator.py    From 3d-semantic-segmentation with MIT License 5 votes vote down vote up
def _calc_ball_trees(self, metric='euclidean'):
        ball_trees = []
        for pointcloud_data in tqdm(self.dataset.data, desc='Ball trees have to be calculated from scratch'):
            ball_trees.append(BallTree(pointcloud_data[:, :2], metric=metric))
        return ball_trees 
Example #14
Source File: evaluation.py    From 3d-semantic-segmentation with MIT License 5 votes vote down vote up
def knn_interpolation(cumulated_pc: np.ndarray, full_sized_data: np.ndarray, k=5):
    """
    Using k-nn interpolation to find labels of points of the full sized pointcloud
    :param cumulated_pc: cumulated pointcloud results after running the network
    :param full_sized_data: full sized point cloud
    :param k: k for k nearest neighbor interpolation
    :return: pointcloud with predicted labels in last column and ground truth labels in last but one column
    """

    labeled = cumulated_pc[cumulated_pc[:, -1] != -1]
    to_be_predicted = full_sized_data.copy()

    ball_tree = BallTree(labeled[:, :3], metric='euclidean')

    knn_classes = labeled[ball_tree.query(to_be_predicted[:, :3], k=k)[1]][:, :, -1].astype(int)

    interpolated = np.zeros(knn_classes.shape[0])

    for i in range(knn_classes.shape[0]):
        interpolated[i] = np.bincount(knn_classes[i]).argmax()

    output = np.zeros((to_be_predicted.shape[0], to_be_predicted.shape[1]+1))
    output[:, :-1] = to_be_predicted

    output[:, -1] = interpolated

    return output 
Example #15
Source File: recommender.py    From atap with Apache License 2.0 5 votes vote down vote up
def fit_transform(self, documents):
        # Transformer will be False if pipeline hasn't been fit yet,
        # Trigger fit_transform and save the transformer and lexicon.
        if self.transformer == False:
            self.transformer = Pipeline([
                ('norm', TextNormalizer(minimum=50, maximum=200)),
                ('transform', Pipeline([
                    ('tfidf', TfidfVectorizer()),
                    ('svd', TruncatedSVD(n_components=200))
                ])
                 )
            ])
            self.lexicon = self.transformer.fit_transform(documents)
            self.tree = BallTree(self.lexicon)
            self.save() 
Example #16
Source File: EC_functions.py    From Model-Free-Episodic-Control with MIT License 5 votes vote down vote up
def update_tree(self, time):
        print 'rebuild tree'
        self.tree = BallTree(self.state[:self.items, :], leaf_size=self.size)
        self.last_tree_built_time = time
        print 'rebuild done' 
Example #17
Source File: entropy_estimators.py    From NPEET with MIT License 5 votes vote down vote up
def build_tree(points):
    if points.shape[1] >= 20:
        return BallTree(points, metric='chebyshev')
    return KDTree(points, metric='chebyshev')

# TESTS 
Example #18
Source File: hamming_ann.py    From klsh with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X):
        X = self._validate_input(X, return_compact=False)
        self._tree = BallTree(X, metric='hamming', leaf_size=self.leaf_size)
        return self 
Example #19
Source File: test_t_sne.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3) 
Example #20
Source File: test_utils.py    From numpy-ml with GNU General Public License v3.0 4 votes vote down vote up
def test_ball_tree(N=1):
    np.random.seed(12345)
    i = 0
    while i < N:
        N = np.random.randint(2, 100)
        M = np.random.randint(2, 100)
        k = np.random.randint(1, N)
        ls = np.min([np.random.randint(1, 10), N - 1])

        X = np.random.rand(N, M)
        BT = BallTree(leaf_size=ls, metric=euclidean)
        BT.fit(X)

        x = np.random.rand(M)
        mine = BT.nearest_neighbors(k, x)
        assert len(mine) == k

        mine_neighb = np.array([n.key for n in mine])
        mine_dist = np.array([n.distance for n in mine])

        sort_ix = np.argsort(mine_dist)
        mine_dist = mine_dist[sort_ix]
        mine_neighb = mine_neighb[sort_ix]

        sk = sk_BallTree(X, leaf_size=ls)
        theirs_dist, ind = sk.query(x.reshape(1, -1), k=k)
        sort_ix = np.argsort(theirs_dist.flatten())

        theirs_dist = theirs_dist.flatten()[sort_ix]
        theirs_neighb = X[ind.flatten()[sort_ix]]

        for j in range(len(theirs_dist)):
            np.testing.assert_almost_equal(mine_neighb[j], theirs_neighb[j])
            np.testing.assert_almost_equal(mine_dist[j], theirs_dist[j])

        print("PASSED")
        i += 1


#######################################################################
#                               Graphs                                #
####################################################################### 
Example #21
Source File: knn.py    From pyod with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """

        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.neigh_.fit(X)

        # In certain cases, _tree does not exist for NearestNeighbors
        # See Issue #158 (https://github.com/yzhao062/pyod/issues/158)
        # n_neighbors = 100
        if self.neigh_._tree is not None:
            self.tree_ = self.neigh_._tree

        else:
            if self.metric_params is not None:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric,
                                      **self.metric_params)
            else:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self