Python faiss.IndexFlatL2() Examples
The following are 28
code examples of faiss.IndexFlatL2().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
faiss
, or try the search function
.
Example #1
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 7 votes |
def test_knn_search(size=10000, gpu_id=None): x = np.random.rand(size, 512) x = x.reshape(x.shape[0], -1).astype('float32') d = x.shape[1] tic = time.time() if gpu_id is None: index = faiss.IndexFlatL2(d) else: cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = gpu_id flat_config = [cfg] resources = [faiss.StandardGpuResources()] index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0]) index.add(x) print('Index built in {} sec'.format(time.time() - tic)) distances, I = index.search(x, 21) print('Searched in {} sec'.format(time.time() - tic)) print(distances.shape) print(I.shape) print(distances[:5]) print(I[:5])
Example #2
Source File: faiss.py From gntp with MIT License | 7 votes |
def _build_approximate_index(self, data: np.ndarray): dimensionality = data.shape[1] nlist = 100 if data.shape[0] > 100 else 2 if self.kernel_name in {'rbf'}: quantizer = faiss.IndexFlatL2(dimensionality) cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist, faiss.METRIC_L2) else: quantizer = faiss.IndexFlatIP(dimensionality) cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist) gpu_index_ivf = faiss.index_cpu_to_gpu(self.resource, 0, cpu_index_flat) gpu_index_ivf.train(data) gpu_index_ivf.add(data) self.index = gpu_index_ivf
Example #3
Source File: __init__.py From bbknn with MIT License | 6 votes |
def create_tree(data,approx,metric,use_faiss,n_trees): ''' Create a faiss/cKDTree/KDTree/annoy index for nearest neighbour lookup. All undescribed input as in ``bbknn.bbknn()``. Returns the resulting index. Input ----- data : ``numppy.array`` PCA coordinates of a batch's cells to index. ''' if approx: ckd = AnnoyIndex(data.shape[1],metric=metric) for i in np.arange(data.shape[0]): ckd.add_item(i,data[i,:]) ckd.build(n_trees) elif metric == 'euclidean': if 'faiss' in sys.modules and use_faiss: ckd = faiss.IndexFlatL2(data.shape[1]) ckd.add(data) else: ckd = cKDTree(data) else: ckd = KDTree(data,metric=metric) return ckd
Example #4
Source File: stat_utils.py From pytorch-metric-learning with MIT License | 6 votes |
def run_kmeans(x, nmb_clusters): """ Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape logging.info("running k-means clustering with k=%d"%nmb_clusters) logging.info("embedding dimensionality is %d"%d) # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 index = faiss.IndexFlatL2(d) if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_all_gpus(index) # perform the training clus.train(x, index) _, idxs = index.search(x, 1) return [int(n[0]) for n in idxs] # modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization
Example #5
Source File: losses.py From SO-Net with MIT License | 6 votes |
def build_nn_index(self, database): ''' :param database: numpy array of Nx3 :return: Faiss index, in CPU ''' # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config) # dimension is 3 index_cpu = faiss.IndexFlatL2(self.dimension) index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu) index.add(database) return index
Example #6
Source File: data.py From dynamicslearn with MIT License | 6 votes |
def cluster(vectorized, ncentroids): import faiss x = vectorized niter = 50 verbose = True d = x.shape[1] kmeans = faiss.Kmeans(d, ncentroids, niter=niter, verbose=verbose) kmeans.train(x) # for i, v in enumerate(kmeans.centroids): # print(i) index = faiss.IndexFlatL2(d) index.add(x) D, I = index.search(kmeans.centroids, 1) x_reduced = x[I, :].squeeze() return x_reduced
Example #7
Source File: RecognitionThread.py From TUT-live-age-estimator with MIT License | 6 votes |
def initialize_celeb(self): print("Initializing celebrity network...") with CustomObjectScope({'relu6': keras.layers.ReLU(6.), 'DepthwiseConv2D': keras.layers.DepthwiseConv2D, 'lifted_struct_loss': lifted_struct_loss, 'triplet_loss': triplet_loss}): self.siameseNet = keras.models.load_model(os.path.join(self.siamesepath, "feature_model.h5")) self.siameseNet._make_predict_function() ##### Read celebrity features celebrity_features = self.siamesepath + os.sep + "features_" + self.celeb_dataset + ".h5" print("Reading celebrity data from {}...".format(celebrity_features)) with h5py.File(celebrity_features, "r") as h5: celeb_features = np.array(h5["features"]).astype(np.float32) self.path_ends = list(h5["path_ends"]) self.celeb_files = [os.path.join(self.visualization_path, s.decode("utf-8")) for s in self.path_ends] print("Building index...") self.celeb_index = faiss.IndexFlatL2(celeb_features.shape[1]) self.celeb_index.add(celeb_features)
Example #8
Source File: run_index.py From denspi with Apache License 2.0 | 6 votes |
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False): d = data.shape[1] index_flat = faiss.IndexFlatL2(d) # make it into a gpu index if cuda: res = faiss.StandardGpuResources() index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) clus = faiss.Clustering(d, num_clusters) clus.verbose = True clus.niter = niter clus.train(data, index_flat) centroids = faiss.vector_float_to_array(clus.centroids) centroids = centroids.reshape(num_clusters, d) if hnsw: quantizer = faiss.IndexHNSWFlat(d, 32) quantizer.hnsw.efSearch = 128 quantizer.train(centroids) quantizer.add(centroids) else: quantizer = faiss.IndexFlatL2(d) quantizer.add(centroids) faiss.write_index(quantizer, quantizer_path)
Example #9
Source File: reranking.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def __init__(self, database, method): super().__init__(database, method) self.index = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) if os.environ.get('CUDA_VISIBLE_DEVICES'): print('CUDA', os.environ.get('CUDA_VISIBLE_DEVICES')) self.index = faiss.index_cpu_to_all_gpus(self.index) self.add()
Example #10
Source File: stat_utils.py From pytorch-metric-learning with MIT License | 5 votes |
def get_knn( reference_embeddings, test_embeddings, k, embeddings_come_from_same_source=False ): """ Finds the k elements in reference_embeddings that are closest to each element of test_embeddings. Args: reference_embeddings: numpy array of size (num_samples, dimensionality). test_embeddings: numpy array of size (num_samples2, dimensionality). k: int, number of nearest neighbors to find embeddings_come_from_same_source: if True, then the nearest neighbor of each element (which is actually itself) will be ignored. Returns: numpy array: indices of nearest k neighbors numpy array: corresponding distances """ d = reference_embeddings.shape[1] logging.info("running k-nn with k=%d"%k) logging.info("embedding dimensionality is %d"%d) index = faiss.IndexFlatL2(d) if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_all_gpus(index) index.add(reference_embeddings) distances, indices = index.search(test_embeddings, k + 1) if embeddings_come_from_same_source: return indices[:, 1:], distances[:, 1:] return indices[:, :k], distances[:, :k] # modified from https://raw.githubusercontent.com/facebookresearch/deepcluster/
Example #11
Source File: knn.py From diffusion with MIT License | 5 votes |
def __init__(self, database, method, M=128, nbits=8, nlist=316, nprobe=64): super().__init__(database, method) self.quantizer = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) self.index = faiss.IndexIVFPQ(self.quantizer, self.D, nlist, M, nbits) samples = database[np.random.permutation(np.arange(self.N))[:self.N // 5]] print("[ANN] train") self.index.train(samples) self.add() self.index.nprobe = nprobe
Example #12
Source File: knn.py From diffusion with MIT License | 5 votes |
def __init__(self, database, method): super().__init__(database, method) self.index = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) if os.environ.get('CUDA_VISIBLE_DEVICES'): self.index = faiss.index_cpu_to_all_gpus(self.index) self.add()
Example #13
Source File: streamlit_demo.py From RecNN with Apache License 2.0 | 5 votes |
def get_index(): import faiss from sklearn.preprocessing import normalize # test indexes indexL2 = faiss.IndexFlatL2(128) indexIP = faiss.IndexFlatIP(128) indexCOS = faiss.IndexFlatIP(128) mov_mat, _, _ = get_embeddings() mov_mat = mov_mat.numpy().astype('float32') indexL2.add(mov_mat) indexIP.add(mov_mat) indexCOS.add(normalize(mov_mat, axis=1, norm='l2')) return {'L2': indexL2, 'IP': indexIP, 'COS': indexCOS}
Example #14
Source File: faiss.py From ann-benchmarks with MIT License | 5 votes |
def fit(self, X): if self._metric == 'angular': X = sklearn.preprocessing.normalize(X, axis=1, norm='l2') if X.dtype != numpy.float32: X = X.astype(numpy.float32) self.quantizer = faiss.IndexFlatL2(X.shape[1]) index = faiss.IndexIVFFlat( self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2) index.train(X) index.add(X) self.index = index
Example #15
Source File: lossess.py From RL-GAN-Net with MIT License | 5 votes |
def build_nn_index(self, database): ''' :param database: numpy array of Nx3 :return: Faiss index, in CPU ''' # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config) # dimension is 3 index_cpu = faiss.IndexFlatL2(self.dimension) index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu) index.add(database) return index
Example #16
Source File: lossess.py From RL-GAN-Net with MIT License | 5 votes |
def build_nn_index(self, database): ''' :param database: numpy array of Nx3 :return: Faiss index, in CPU ''' # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config) # dimension is 3 index_cpu = faiss.IndexFlatL2(self.dimension) index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu) index.add(database) return index
Example #17
Source File: faiss.py From gntp with MIT License | 5 votes |
def _build_exact_index(self, data: np.ndarray): dimensionality = data.shape[1] if self.kernel_name in {'rbf'}: self.cpu_index_flat = faiss.IndexFlatL2(dimensionality) else: self.cpu_index_flat = faiss.IndexFlatIP(dimensionality) if not self.cpu: self.index = faiss.index_cpu_to_gpu(self.resource, 0, self.cpu_index_flat) else: self.index = self.cpu_index_flat self.index.add(data)
Example #18
Source File: auxiliaries.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 5 votes |
def recover_closest_one_dataset(feature_matrix_all, image_paths, save_path, n_image_samples=10, n_closest=3): """ Provide sample recoveries. Args: feature_matrix_all: np.ndarray [n_samples x embed_dim], full data embedding of test samples. image_paths: list [n_samples], list of datapaths corresponding to <feature_matrix_all> save_path: str, where to store sample image. n_image_samples: Number of sample recoveries. n_closest: Number of closest recoveries to show. Returns: Nothing! """ image_paths = np.array([x[0] for x in image_paths]) sample_idxs = np.random.choice(np.arange(len(feature_matrix_all)), n_image_samples) faiss_search_index = faiss.IndexFlatL2(feature_matrix_all.shape[-1]) faiss_search_index.add(feature_matrix_all) _, closest_feature_idxs = faiss_search_index.search(feature_matrix_all, n_closest+1) sample_paths = image_paths[closest_feature_idxs][sample_idxs] f,axes = plt.subplots(n_image_samples, n_closest+1) for i,(ax,plot_path) in enumerate(zip(axes.reshape(-1), sample_paths.reshape(-1))): ax.imshow(np.array(Image.open(plot_path))) ax.set_xticks([]) ax.set_yticks([]) if i%(n_closest+1): ax.axvline(x=0, color='g', linewidth=13) else: ax.axvline(x=0, color='r', linewidth=13) f.set_size_inches(10,20) f.tight_layout() f.savefig(save_path) plt.close() ####### RECOVER CLOSEST EXAMPLE IMAGES #######
Example #19
Source File: matching_localfeatures.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def euclidean_search_test(fn_npy): ds = load_train_ensemble_dataset() cpu_index = faiss.IndexFlatL2(ds[f'feats_train'].shape[1]) cpu_index.add(ds[f'feats_train']) _, all_ranks = cpu_index.search(x=ds[f'feats_test'], k=100) Path(fn_npy).parent.mkdir(parents=True, exist_ok=True) np.save(fn_npy, all_ranks)
Example #20
Source File: submit_recognition.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def get_df_and_dists(topk=100): test_dirs = [ ROOT + 'exp/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'exp/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'exp/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'exp/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] train_dirs = [ ROOT + 'exp/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'exp/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'exp/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'exp/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] logger.info('load ids and features.') ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) logger.info('done.') logger.info('build index...') cpu_index = faiss.IndexFlatL2(feats_train.shape[1]) cpu_index.add(feats_train) dists, topk_idx = cpu_index.search(x=feats_test, k=topk) logger.info('query search done.') df = pd.DataFrame(ids_test, columns=['id']) df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx]) return df, dists
Example #21
Source File: submit_retrieval.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def main(): index_dirs = [ '../exp/v2clean/feats_index19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'] test_dirs = [ '../exp/v2clean/feats_test19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'] train_dirs = [ '../exp/v2clean/feats_train_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'] ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs, normalize=True) ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, normalize=True) print('build index...') cpu_index = faiss.IndexFlatL2(feats_index.shape[1]) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) gpu_index.add(feats_index) dists, topk_idx = gpu_index.search(x=feats_test, k=100) print('query search done.') subm = pd.DataFrame(ids_test, columns=['id']) subm['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_index[topk_idx]) subm = reranking_submission(ids_index, feats_index, ids_test, feats_test, ids_train, feats_train, subm, topk=100) output_name = ROOT + f'output/submit_retrieval.csv.gz' subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False) print('saved to ' + output_name) cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" ' print(cmd) subprocess.run(cmd, shell=True)
Example #22
Source File: modelnet_shrec_loader.py From SO-Net with MIT License | 5 votes |
def build_nn_index(self, database): ''' :param database: numpy array of Nx3 :return: Faiss index, in CPU ''' index = faiss.IndexFlatL2(self.dimension) # dimension is 3 index.add(database) return index
Example #23
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 5 votes |
def compute_cluster_assignment(centroids, x): assert centroids is not None, "should train before assigning" d = centroids.shape[1] index = faiss.IndexFlatL2(d) index.add(centroids) distances, labels = index.search(x, 1) return labels.ravel()
Example #24
Source File: auxiliaries.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 5 votes |
def recover_closest_inshop(query_feature_matrix_all, gallery_feature_matrix_all, query_image_paths, gallery_image_paths, save_path, n_image_samples=10, n_closest=3): """ Provide sample recoveries. Args: query_feature_matrix_all: np.ndarray [n_query_samples x embed_dim], full data embedding of query samples. gallery_feature_matrix_all: np.ndarray [n_gallery_samples x embed_dim], full data embedding of gallery samples. query_image_paths: list [n_samples], list of datapaths corresponding to <query_feature_matrix_all> gallery_image_paths: list [n_samples], list of datapaths corresponding to <gallery_feature_matrix_all> save_path: str, where to store sample image. n_image_samples: Number of sample recoveries. n_closest: Number of closest recoveries to show. Returns: Nothing! """ query_image_paths, gallery_image_paths = np.array(query_image_paths), np.array(gallery_image_paths) sample_idxs = np.random.choice(np.arange(len(query_feature_matrix_all)), n_image_samples) faiss_search_index = faiss.IndexFlatL2(gallery_feature_matrix_all.shape[-1]) faiss_search_index.add(gallery_feature_matrix_all) _, closest_feature_idxs = faiss_search_index.search(query_feature_matrix_all, n_closest) image_paths = gallery_image_paths[closest_feature_idxs] image_paths = np.concatenate([query_image_paths.reshape(-1,1), image_paths],axis=-1) sample_paths = image_paths[closest_feature_idxs][sample_idxs] f,axes = plt.subplots(n_image_samples, n_closest+1) for i,(ax,plot_path) in enumerate(zip(axes.reshape(-1), sample_paths.reshape(-1))): ax.imshow(np.array(Image.open(plot_path))) ax.set_xticks([]) ax.set_yticks([]) if i%(n_closest+1): ax.axvline(x=0, color='g', linewidth=13) else: ax.axvline(x=0, color='r', linewidth=13) f.set_size_inches(10,20) f.tight_layout() f.savefig(save_path) plt.close()
Example #25
Source File: matching_localfeatures.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 4 votes |
def faiss_search_results_train_train(block_id=1, n_splits=1): dataset = load_train_dataset() print('Loading train19 landmark dict') landmark_dict = load_train19_landmark_dict() size_train = dataset.feats_train.shape[0] part_size = int(size_train / n_splits) idx_train_start = (block_id - 1) * part_size idx_train_end = (block_id) * part_size if block_id == n_splits: idx_train_end = size_train cpu_index = faiss.IndexFlatL2(dataset.feats_train.shape[1]) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) gpu_index.add(dataset.feats_train) dists, topk_idx = gpu_index.search( x=dataset.feats_train[idx_train_start:idx_train_end], k=1000) df = pd.DataFrame( dataset.ids_train[idx_train_start:idx_train_end], columns=['id']) df['images'] = np.apply_along_axis( ' '.join, axis=1, arr=dataset.ids_train[topk_idx]) print('generate sub') rows = [] for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()), total=len(df)): landmark_id = landmark_dict[r['id']] same_landmark_images = [] for rank, imid in enumerate(r.images.split(' ')): if landmark_id == landmark_dict[imid]: same_landmark_images.append( f'{rank}:{dists[imidx, rank]:.8f}:{imid}') if len(same_landmark_images) >= 100: break rows.append({ 'id': r['id'], 'landmark_id': landmark_id, 'matched': ' '.join(same_landmark_images), }) fn = (ROOT + 'input/' + f'train19_train19_faiss_search_same_landmarks_blk{block_id}.csv.gz') Path(fn).parent.mkdir(parents=True, exist_ok=True) print('to_csv') df = pd.DataFrame(rows).to_csv(fn, index=False, compression='gzip')
Example #26
Source File: utils.py From DeMa-BWE with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_nn_avg_dist_mog(emb, query, knn): """ Compute the average distance of the `knn` nearest neighbors for a given set of embeddings and queries. Use Faiss if available. emb has divided sqrt(2) * var """ if FAISS_AVAILABLE: emb = emb.cpu().numpy() query = query.cpu().numpy() if hasattr(faiss, 'StandardGpuResources'): # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatL2(res, emb.shape[1], config) else: # cpu mode index = faiss.IndexFlatL2(emb.shape[1]) index.add(emb) # Ad-hoc implementation topK = 1000 temp = 2. topK = 10 distances, idxes = index.search(query, topK) return distances.mean(1) #query_idx = np.tile(np.arange(query.shape[0]) + 1, (topK, 1)).transpose() #rank_diff = abs(np.log(idxes + 1) - np.log(query_idx)) / temp #mog_distances_sorted = np.sort(distances + rank_diff)[:, :knn] # return: qN, knn #return mog_distances_sorted.mean(1) else: bs = 1024 all_distances = [] emb = emb.transpose(0, 1).contiguous() for i in range(0, query.shape[0], bs): distances = query[i:i + bs].mm(emb) best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True) all_distances.append(best_distances.mean(1).cpu()) all_distances = torch.cat(all_distances) return all_distances.numpy()
Example #27
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 4 votes |
def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None): """ Find k nearest neighbors for each of the n examples. Distances are computed using Squared Euclidean distance metric. Arguments: ---------- queries x (ndarray): N examples to search within. [N x d]. gpu_id (int): use CPU if None else use GPU with the specified id. queries (ndarray): find nearest neigbor for each query example. [M x d] matrix If None than find k nearest neighbors for each row of x (excluding self exampels). k (int): number of nearest neighbors to find. Return I (ndarray): Indices of the nearest neighnpors. [M x k] distances (ndarray): Distances to the nearest neighbors. [M x k] """ if gpu_id is not None and not isinstance(gpu_id, int): raise ValueError('gpu_id must be None or int') x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32) remove_self = False # will we have queries in the search results? if queries is None: remove_self = True queries = x k += 1 d = x.shape[1] tic = time.time() if gpu_id is None: logging.debug('FAISS: cpu::find {} nearest neighbors'\ .format(k - int(remove_self))) index = faiss.IndexFlatL2(d) else: logging.debug('FAISS: gpu[{}]::find {} nearest neighbors'\ .format(gpu_id, k - int(remove_self))) cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = gpu_id flat_config = [cfg] resources = [faiss.StandardGpuResources()] index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0]) index.add(x) distances, nns = index.search(queries, k) if remove_self: for i in range(len(nns)): indices = np.nonzero(nns[i, :] != i)[0] indices.sort() if len(indices) > k - 1: indices = indices[:-1] nns[i, :-1] = nns[i, indices] distances[i, :-1] = distances[i, indices] nns = nns[:, :-1] distances = distances[:, :-1] logging.debug('FAISS: Neighbors search total elapsed time: {:.2f} sec'.format(time.time() - tic)) return nns, distances
Example #28
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 4 votes |
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0): """ Runs k-means clustering on one or several GPUs """ assert np.all(~np.isnan(x)), 'x contains NaN' assert np.all(np.isfinite(x)), 'x contains Inf' if isinstance(gpu_ids, int): gpu_ids = [gpu_ids] assert gpu_ids is None or len(gpu_ids) d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = bool(verbose) kmeans.niter = niter kmeans.nredo = nredo # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 if gpu_ids is not None: res = [faiss.StandardGpuResources() for i in gpu_ids] flat_config = [] for i in gpu_ids: cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if len(gpu_ids) == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(len(gpu_ids))] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) else: index = faiss.IndexFlatL2(d) # perform the training kmeans.train(x, index) centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) #logging.debug("Final objective: %.4g" % objective[-1]) return centroids.reshape(num_clusters, d)