Python faiss.IndexFlatL2() Examples

The following are 28 code examples of faiss.IndexFlatL2(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module faiss , or try the search function .
Example #1
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 7 votes vote down vote up
def test_knn_search(size=10000, gpu_id=None):
    x = np.random.rand(size, 512)
    x = x.reshape(x.shape[0], -1).astype('float32')
    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        index = faiss.IndexFlatL2(d)
    else:
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    print('Index built in {} sec'.format(time.time() - tic))
    distances, I = index.search(x, 21)
    print('Searched in {} sec'.format(time.time() - tic))
    print(distances.shape)
    print(I.shape)
    print(distances[:5])
    print(I[:5]) 
Example #2
Source File: faiss.py    From gntp with MIT License 7 votes vote down vote up
def _build_approximate_index(self,
                                     data: np.ndarray):
            dimensionality = data.shape[1]
            nlist = 100 if data.shape[0] > 100 else 2

            if self.kernel_name in {'rbf'}:
                quantizer = faiss.IndexFlatL2(dimensionality)
                cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist, faiss.METRIC_L2)
            else:
                quantizer = faiss.IndexFlatIP(dimensionality)
                cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist)

            gpu_index_ivf = faiss.index_cpu_to_gpu(self.resource, 0, cpu_index_flat)
            gpu_index_ivf.train(data)
            gpu_index_ivf.add(data)
            self.index = gpu_index_ivf 
Example #3
Source File: __init__.py    From bbknn with MIT License 6 votes vote down vote up
def create_tree(data,approx,metric,use_faiss,n_trees):
	'''
	Create a faiss/cKDTree/KDTree/annoy index for nearest neighbour lookup. All undescribed input
	as in ``bbknn.bbknn()``. Returns the resulting index.

	Input
	-----
	data : ``numppy.array``
		PCA coordinates of a batch's cells to index.
	'''
	if approx:
		ckd = AnnoyIndex(data.shape[1],metric=metric)
		for i in np.arange(data.shape[0]):
			ckd.add_item(i,data[i,:])
		ckd.build(n_trees)
	elif metric == 'euclidean':
		if 'faiss' in sys.modules and use_faiss:
			ckd = faiss.IndexFlatL2(data.shape[1])
			ckd.add(data)
		else:
			ckd = cKDTree(data)
	else:
		ckd = KDTree(data,metric=metric)
	return ckd 
Example #4
Source File: stat_utils.py    From pytorch-metric-learning with MIT License 6 votes vote down vote up
def run_kmeans(x, nmb_clusters):
    """
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape
    logging.info("running k-means clustering with k=%d"%nmb_clusters)
    logging.info("embedding dimensionality is %d"%d)

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    # perform the training
    clus.train(x, index)
    _, idxs = index.search(x, 1)

    return [int(n[0]) for n in idxs]


# modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization 
Example #5
Source File: losses.py    From SO-Net with MIT License 6 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #6
Source File: data.py    From dynamicslearn with MIT License 6 votes vote down vote up
def cluster(vectorized, ncentroids):
    import faiss
    x = vectorized
    niter = 50
    verbose = True
    d = x.shape[1]
    kmeans = faiss.Kmeans(d, ncentroids, niter=niter, verbose=verbose)
    kmeans.train(x)

    # for i, v in enumerate(kmeans.centroids):
    #     print(i)

    index = faiss.IndexFlatL2(d)
    index.add(x)
    D, I = index.search(kmeans.centroids, 1)
    x_reduced = x[I, :].squeeze()
    return x_reduced 
Example #7
Source File: RecognitionThread.py    From TUT-live-age-estimator with MIT License 6 votes vote down vote up
def initialize_celeb(self):
        print("Initializing celebrity network...")

        with CustomObjectScope({'relu6': keras.layers.ReLU(6.),
                                'DepthwiseConv2D': keras.layers.DepthwiseConv2D,
                                'lifted_struct_loss': lifted_struct_loss,
                                'triplet_loss': triplet_loss}):
            self.siameseNet = keras.models.load_model(os.path.join(self.siamesepath, "feature_model.h5"))

        self.siameseNet._make_predict_function()

        ##### Read celebrity features
        celebrity_features = self.siamesepath + os.sep + "features_" + self.celeb_dataset + ".h5"
        print("Reading celebrity data from {}...".format(celebrity_features))

        with h5py.File(celebrity_features, "r") as h5:
            celeb_features = np.array(h5["features"]).astype(np.float32)
            self.path_ends = list(h5["path_ends"])
            self.celeb_files = [os.path.join(self.visualization_path, s.decode("utf-8")) for s in self.path_ends]

        print("Building index...")
        self.celeb_index = faiss.IndexFlatL2(celeb_features.shape[1])
        self.celeb_index.add(celeb_features) 
Example #8
Source File: run_index.py    From denspi with Apache License 2.0 6 votes vote down vote up
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False):
    d = data.shape[1]

    index_flat = faiss.IndexFlatL2(d)
    # make it into a gpu index
    if cuda:
        res = faiss.StandardGpuResources()
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    clus = faiss.Clustering(d, num_clusters)
    clus.verbose = True
    clus.niter = niter
    clus.train(data, index_flat)
    centroids = faiss.vector_float_to_array(clus.centroids)
    centroids = centroids.reshape(num_clusters, d)

    if hnsw:
        quantizer = faiss.IndexHNSWFlat(d, 32)
        quantizer.hnsw.efSearch = 128
        quantizer.train(centroids)
        quantizer.add(centroids)
    else:
        quantizer = faiss.IndexFlatL2(d)
        quantizer.add(centroids)

    faiss.write_index(quantizer, quantizer_path) 
Example #9
Source File: reranking.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def __init__(self, database, method):
        super().__init__(database, method)
        self.index = {'cosine': faiss.IndexFlatIP,
                      'euclidean': faiss.IndexFlatL2}[method](self.D)
        if os.environ.get('CUDA_VISIBLE_DEVICES'):
            print('CUDA', os.environ.get('CUDA_VISIBLE_DEVICES'))
            self.index = faiss.index_cpu_to_all_gpus(self.index)
        self.add() 
Example #10
Source File: stat_utils.py    From pytorch-metric-learning with MIT License 5 votes vote down vote up
def get_knn(
    reference_embeddings, test_embeddings, k, embeddings_come_from_same_source=False
):
    """
    Finds the k elements in reference_embeddings that are closest to each
    element of test_embeddings.
    Args:
        reference_embeddings: numpy array of size (num_samples, dimensionality).
        test_embeddings: numpy array of size (num_samples2, dimensionality).
        k: int, number of nearest neighbors to find
        embeddings_come_from_same_source: if True, then the nearest neighbor of
                                         each element (which is actually itself)
                                         will be ignored.
    Returns:
        numpy array: indices of nearest k neighbors
        numpy array: corresponding distances
    """
    d = reference_embeddings.shape[1]
    logging.info("running k-nn with k=%d"%k)
    logging.info("embedding dimensionality is %d"%d)
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    index.add(reference_embeddings)
    distances, indices = index.search(test_embeddings, k + 1)
    if embeddings_come_from_same_source:
        return indices[:, 1:], distances[:, 1:]
    return indices[:, :k], distances[:, :k]


# modified from https://raw.githubusercontent.com/facebookresearch/deepcluster/ 
Example #11
Source File: knn.py    From diffusion with MIT License 5 votes vote down vote up
def __init__(self, database, method, M=128, nbits=8, nlist=316, nprobe=64):
        super().__init__(database, method)
        self.quantizer = {'cosine': faiss.IndexFlatIP,
                          'euclidean': faiss.IndexFlatL2}[method](self.D)
        self.index = faiss.IndexIVFPQ(self.quantizer, self.D, nlist, M, nbits)
        samples = database[np.random.permutation(np.arange(self.N))[:self.N // 5]]
        print("[ANN] train")
        self.index.train(samples)
        self.add()
        self.index.nprobe = nprobe 
Example #12
Source File: knn.py    From diffusion with MIT License 5 votes vote down vote up
def __init__(self, database, method):
        super().__init__(database, method)
        self.index = {'cosine': faiss.IndexFlatIP,
                      'euclidean': faiss.IndexFlatL2}[method](self.D)
        if os.environ.get('CUDA_VISIBLE_DEVICES'):
            self.index = faiss.index_cpu_to_all_gpus(self.index)
        self.add() 
Example #13
Source File: streamlit_demo.py    From RecNN with Apache License 2.0 5 votes vote down vote up
def get_index():
    import faiss
    from sklearn.preprocessing import normalize
    # test indexes
    indexL2 = faiss.IndexFlatL2(128)
    indexIP = faiss.IndexFlatIP(128)
    indexCOS = faiss.IndexFlatIP(128)

    mov_mat, _, _ = get_embeddings()
    mov_mat = mov_mat.numpy().astype('float32')
    indexL2.add(mov_mat)
    indexIP.add(mov_mat)
    indexCOS.add(normalize(mov_mat, axis=1, norm='l2'))
    return {'L2': indexL2, 'IP': indexIP, 'COS': indexCOS} 
Example #14
Source File: faiss.py    From ann-benchmarks with MIT License 5 votes vote down vote up
def fit(self, X):
        if self._metric == 'angular':
            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')

        if X.dtype != numpy.float32:
            X = X.astype(numpy.float32)

        self.quantizer = faiss.IndexFlatL2(X.shape[1])
        index = faiss.IndexIVFFlat(
            self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
        index.train(X)
        index.add(X)
        self.index = index 
Example #15
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #16
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #17
Source File: faiss.py    From gntp with MIT License 5 votes vote down vote up
def _build_exact_index(self,
                               data: np.ndarray):
            dimensionality = data.shape[1]

            if self.kernel_name in {'rbf'}:
                self.cpu_index_flat = faiss.IndexFlatL2(dimensionality)
            else:
                self.cpu_index_flat = faiss.IndexFlatIP(dimensionality)

            if not self.cpu:
                self.index = faiss.index_cpu_to_gpu(self.resource, 0, self.cpu_index_flat)
            else:
                self.index = self.cpu_index_flat
            self.index.add(data) 
Example #18
Source File: auxiliaries.py    From Deep-Metric-Learning-Baselines with Apache License 2.0 5 votes vote down vote up
def recover_closest_one_dataset(feature_matrix_all, image_paths, save_path, n_image_samples=10, n_closest=3):
    """
    Provide sample recoveries.

    Args:
        feature_matrix_all: np.ndarray [n_samples x embed_dim], full data embedding of test samples.
        image_paths:        list [n_samples], list of datapaths corresponding to <feature_matrix_all>
        save_path:          str, where to store sample image.
        n_image_samples:    Number of sample recoveries.
        n_closest:          Number of closest recoveries to show.
    Returns:
        Nothing!
    """
    image_paths = np.array([x[0] for x in image_paths])
    sample_idxs = np.random.choice(np.arange(len(feature_matrix_all)), n_image_samples)

    faiss_search_index = faiss.IndexFlatL2(feature_matrix_all.shape[-1])
    faiss_search_index.add(feature_matrix_all)
    _, closest_feature_idxs = faiss_search_index.search(feature_matrix_all, n_closest+1)

    sample_paths = image_paths[closest_feature_idxs][sample_idxs]

    f,axes = plt.subplots(n_image_samples, n_closest+1)
    for i,(ax,plot_path) in enumerate(zip(axes.reshape(-1), sample_paths.reshape(-1))):
        ax.imshow(np.array(Image.open(plot_path)))
        ax.set_xticks([])
        ax.set_yticks([])
        if i%(n_closest+1):
            ax.axvline(x=0, color='g', linewidth=13)
        else:
            ax.axvline(x=0, color='r', linewidth=13)
    f.set_size_inches(10,20)
    f.tight_layout()
    f.savefig(save_path)
    plt.close()


####### RECOVER CLOSEST EXAMPLE IMAGES ####### 
Example #19
Source File: matching_localfeatures.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def euclidean_search_test(fn_npy):
    ds = load_train_ensemble_dataset()

    cpu_index = faiss.IndexFlatL2(ds[f'feats_train'].shape[1])
    cpu_index.add(ds[f'feats_train'])

    _, all_ranks = cpu_index.search(x=ds[f'feats_test'], k=100)
    Path(fn_npy).parent.mkdir(parents=True, exist_ok=True)
    np.save(fn_npy, all_ranks) 
Example #20
Source File: submit_recognition.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def get_df_and_dists(topk=100):
    test_dirs = [
        ROOT + 'exp/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT + 'exp/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'exp/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'exp/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT + 'exp/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT + 'exp/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    train_dirs = [
        ROOT + 'exp/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT + 'exp/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'exp/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'exp/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT + 'exp/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT + 'exp/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]

    logger.info('load ids and features.')
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True)
    logger.info('done.')

    logger.info('build index...')
    cpu_index = faiss.IndexFlatL2(feats_train.shape[1])
    cpu_index.add(feats_train)
    dists, topk_idx = cpu_index.search(x=feats_test, k=topk)
    logger.info('query search done.')

    df = pd.DataFrame(ids_test, columns=['id'])
    df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx])

    return df, dists 
Example #21
Source File: submit_retrieval.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def main():
    index_dirs = [
        '../exp/v2clean/feats_index19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/']
    test_dirs = [
        '../exp/v2clean/feats_test19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/']
    train_dirs = [
        '../exp/v2clean/feats_train_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/']

    ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs, normalize=True)
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, normalize=True)

    print('build index...')
    cpu_index = faiss.IndexFlatL2(feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(feats_index)
    dists, topk_idx = gpu_index.search(x=feats_test, k=100)
    print('query search done.')

    subm = pd.DataFrame(ids_test, columns=['id'])
    subm['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_index[topk_idx])

    subm = reranking_submission(ids_index, feats_index,
                                ids_test, feats_test,
                                ids_train, feats_train,
                                subm, topk=100)

    output_name = ROOT + f'output/submit_retrieval.csv.gz'
    subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False)
    print('saved to ' + output_name)

    cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" '
    print(cmd)
    subprocess.run(cmd, shell=True) 
Example #22
Source File: modelnet_shrec_loader.py    From SO-Net with MIT License 5 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        index = faiss.IndexFlatL2(self.dimension)  # dimension is 3
        index.add(database)
        return index 
Example #23
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 5 votes vote down vote up
def compute_cluster_assignment(centroids, x):
    assert centroids is not None, "should train before assigning"
    d = centroids.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(centroids)
    distances, labels = index.search(x, 1)
    return labels.ravel() 
Example #24
Source File: auxiliaries.py    From Deep-Metric-Learning-Baselines with Apache License 2.0 5 votes vote down vote up
def recover_closest_inshop(query_feature_matrix_all, gallery_feature_matrix_all, query_image_paths, gallery_image_paths, save_path, n_image_samples=10, n_closest=3):
    """
    Provide sample recoveries.

    Args:
        query_feature_matrix_all:   np.ndarray [n_query_samples x embed_dim], full data embedding of query samples.
        gallery_feature_matrix_all: np.ndarray [n_gallery_samples x embed_dim], full data embedding of gallery samples.
        query_image_paths:          list [n_samples], list of datapaths corresponding to <query_feature_matrix_all>
        gallery_image_paths:        list [n_samples], list of datapaths corresponding to <gallery_feature_matrix_all>
        save_path:          str, where to store sample image.
        n_image_samples:    Number of sample recoveries.
        n_closest:          Number of closest recoveries to show.
    Returns:
        Nothing!
    """
    query_image_paths, gallery_image_paths   = np.array(query_image_paths), np.array(gallery_image_paths)
    sample_idxs = np.random.choice(np.arange(len(query_feature_matrix_all)), n_image_samples)

    faiss_search_index = faiss.IndexFlatL2(gallery_feature_matrix_all.shape[-1])
    faiss_search_index.add(gallery_feature_matrix_all)
    _, closest_feature_idxs = faiss_search_index.search(query_feature_matrix_all, n_closest)

    image_paths  = gallery_image_paths[closest_feature_idxs]
    image_paths  = np.concatenate([query_image_paths.reshape(-1,1), image_paths],axis=-1)

    sample_paths = image_paths[closest_feature_idxs][sample_idxs]

    f,axes = plt.subplots(n_image_samples, n_closest+1)
    for i,(ax,plot_path) in enumerate(zip(axes.reshape(-1), sample_paths.reshape(-1))):
        ax.imshow(np.array(Image.open(plot_path)))
        ax.set_xticks([])
        ax.set_yticks([])
        if i%(n_closest+1):
            ax.axvline(x=0, color='g', linewidth=13)
        else:
            ax.axvline(x=0, color='r', linewidth=13)
    f.set_size_inches(10,20)
    f.tight_layout()
    f.savefig(save_path)
    plt.close() 
Example #25
Source File: matching_localfeatures.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 4 votes vote down vote up
def faiss_search_results_train_train(block_id=1, n_splits=1):
    dataset = load_train_dataset()
    print('Loading train19 landmark dict')
    landmark_dict = load_train19_landmark_dict()

    size_train = dataset.feats_train.shape[0]
    part_size = int(size_train / n_splits)
    idx_train_start = (block_id - 1) * part_size
    idx_train_end = (block_id) * part_size
    if block_id == n_splits:
        idx_train_end = size_train

    cpu_index = faiss.IndexFlatL2(dataset.feats_train.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(dataset.feats_train)
    dists, topk_idx = gpu_index.search(
        x=dataset.feats_train[idx_train_start:idx_train_end],
        k=1000)

    df = pd.DataFrame(
        dataset.ids_train[idx_train_start:idx_train_end],
        columns=['id'])
    df['images'] = np.apply_along_axis(
        ' '.join, axis=1, arr=dataset.ids_train[topk_idx])

    print('generate sub')
    rows = []
    for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()),
                                   total=len(df)):
        landmark_id = landmark_dict[r['id']]
        same_landmark_images = []
        for rank, imid in enumerate(r.images.split(' ')):
            if landmark_id == landmark_dict[imid]:
                same_landmark_images.append(
                    f'{rank}:{dists[imidx, rank]:.8f}:{imid}')
                if len(same_landmark_images) >= 100:
                    break

        rows.append({
            'id': r['id'],
            'landmark_id': landmark_id,
            'matched': ' '.join(same_landmark_images),
        })

    fn = (ROOT + 'input/' +
          f'train19_train19_faiss_search_same_landmarks_blk{block_id}.csv.gz')
    Path(fn).parent.mkdir(parents=True, exist_ok=True)

    print('to_csv')
    df = pd.DataFrame(rows).to_csv(fn, index=False,
                                   compression='gzip') 
Example #26
Source File: utils.py    From DeMa-BWE with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_nn_avg_dist_mog(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.

    emb has divided sqrt(2) * var
    """
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatL2(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatL2(emb.shape[1])
        index.add(emb)
        # Ad-hoc implementation
        topK = 1000
        temp = 2.
        topK = 10
        distances, idxes = index.search(query, topK)
        return distances.mean(1)
        #query_idx = np.tile(np.arange(query.shape[0]) + 1, (topK, 1)).transpose()
        #rank_diff = abs(np.log(idxes + 1) - np.log(query_idx)) / temp
        #mog_distances_sorted = np.sort(distances + rank_diff)[:, :knn]
        # return: qN, knn
        #return mog_distances_sorted.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances.numpy() 
Example #27
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None):
    """
    Find k nearest neighbors for each of the n examples.
    Distances are computed using Squared Euclidean distance metric.

    Arguments:
    ----------
    queries
    x (ndarray): N examples to search within. [N x d].
    gpu_id (int): use CPU if None else use GPU with the specified id.
    queries (ndarray): find nearest neigbor for each query example. [M x d] matrix
        If None than find k nearest neighbors for each row of x
        (excluding self exampels).
    k (int): number of nearest neighbors to find.

    Return
    I (ndarray): Indices of the nearest neighnpors. [M x k]
    distances (ndarray): Distances to the nearest neighbors. [M x k]

    """
    if gpu_id is not None and not isinstance(gpu_id, int):
        raise ValueError('gpu_id must be None or int')
    x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32)
    remove_self = False # will we have queries in the search results?
    if queries is None:
        remove_self = True
        queries = x
        k += 1

    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        logging.debug('FAISS: cpu::find {} nearest neighbors'\
                     .format(k - int(remove_self)))
        index = faiss.IndexFlatL2(d)
    else:
        logging.debug('FAISS: gpu[{}]::find {} nearest neighbors'\
                     .format(gpu_id, k - int(remove_self)))
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    distances, nns = index.search(queries, k)
    if remove_self:
        for i in range(len(nns)):
            indices = np.nonzero(nns[i, :] != i)[0]
            indices.sort()
            if len(indices) > k - 1:
                indices = indices[:-1]
            nns[i, :-1] = nns[i, indices]
            distances[i, :-1] = distances[i, indices]
        nns = nns[:, :-1]
        distances = distances[:, :-1]
    logging.debug('FAISS: Neighbors search total elapsed time: {:.2f} sec'.format(time.time() - tic))
    return nns, distances 
Example #28
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d)