Python faiss.IndexFlatIP() Examples
The following are 15
code examples of faiss.IndexFlatIP().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
faiss
, or try the search function
.
Example #1
Source File: faiss.py From gntp with MIT License | 7 votes |
def _build_approximate_index(self, data: np.ndarray): dimensionality = data.shape[1] nlist = 100 if data.shape[0] > 100 else 2 if self.kernel_name in {'rbf'}: quantizer = faiss.IndexFlatL2(dimensionality) cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist, faiss.METRIC_L2) else: quantizer = faiss.IndexFlatIP(dimensionality) cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist) gpu_index_ivf = faiss.index_cpu_to_gpu(self.resource, 0, cpu_index_flat) gpu_index_ivf.train(data) gpu_index_ivf.add(data) self.index = gpu_index_ivf
Example #2
Source File: h5_to_faiss.py From Seq2Seq-Vis with Apache License 2.0 | 6 votes |
def main(): f = h5py.File(opt.states, "r") data = f[opt.data] seqs, slens, hid = data.shape print("Processing {} Sequences".format(seqs)) print("with {} tokens each".format(slens)) print("and {} states".format(hid)) # Initialize a new index index = faiss.IndexFlatIP(hid) # Fill it for ix in tqdm(range(0, seqs-opt.stepsize, opt.stepsize)): cdata = np.array(data[ix:ix+opt.stepsize]\ .reshape(-1, hid), dtype="float32") index.add(cdata) f.close() faiss.write_index(index, opt.output)
Example #3
Source File: indexing.py From faiss-server with MIT License | 5 votes |
def do_indexing(word2vec_model=None): if not os.path.isfile(INDEX_FILE_PATH): index = faiss.IndexFlatIP(word2vec_model.vector_size) index.add(word2vec_model.wv.syn0norm) faiss.write_index(index, INDEX_FILE_PATH) return index else: return faiss.read_index(INDEX_FILE_PATH)
Example #4
Source File: phrase_embed.py From diora with Apache License 2.0 | 5 votes |
def __init__(self, dim=None): super(Index, self).__init__() self.D, self.I = None, None self.index = faiss.IndexFlatIP(dim)
Example #5
Source File: faiss_indexer.py From BLINK with MIT License | 5 votes |
def __init__(self, vector_sz: int = 1, buffer_size: int = 50000): super(DenseFlatIndexer, self).__init__(buffer_size=buffer_size) self.index = faiss.IndexFlatIP(vector_sz)
Example #6
Source File: utils.py From DeMa-BWE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_nn_avg_dist(emb, query, knn): """ Compute the average distance of the `knn` nearest neighbors for a given set of embeddings and queries. Use Faiss if available. """ if FAISS_AVAILABLE: emb = emb.cpu().numpy() query = query.cpu().numpy() if hasattr(faiss, 'StandardGpuResources'): # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, emb.shape[1], config) else: # cpu mode index = faiss.IndexFlatIP(emb.shape[1]) index.add(emb) distances, _ = index.search(query, knn) return distances.mean(1) else: bs = 1024 all_distances = [] emb = emb.transpose(0, 1).contiguous() for i in range(0, query.shape[0], bs): distances = query[i:i + bs].mm(emb) best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True) all_distances.append(best_distances.mean(1).cpu()) all_distances = torch.cat(all_distances) return all_distances.numpy()
Example #7
Source File: create_faiss.py From exbert with Apache License 2.0 | 5 votes |
def train_indexes(ce:CorpusDataWrapper, stepsize=100, drop_null=True): """ Parameters: =========== - corpus_embedding: Wrapper around HDF5 file for easy access to data - stepsize: How many sentences to train with at once - drop_null: Don't index the embeddings of special tokens (e.g., [CLS] and [SEP]) whose spacy POS are null """ NUM_LAYERS = ce.n_layers # want to account for the input layer, which for attentions + contexts is all value 0 embedding_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)] context_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)] for ix in range(0, len(ce), stepsize): cdata = ce[ix:ix+stepsize] if drop_null: embeddings = np.concatenate([c.zero_special_embeddings for c in cdata], axis=1) contexts = np.concatenate([c.zero_special_contexts for c in cdata], axis=1) else: embeddings = np.concatenate([c.embeddings for c in cdata], axis=1) contexts = np.concatenate([c.contexts for c in cdata], axis=1) for i in range(NUM_LAYERS): embedding_indexes[i].add(embeddings[i]) context_indexes[i].add(contexts[i]) return embedding_indexes, context_indexes
Example #8
Source File: submit_retrieval.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def predict_landmark_id(ids_query, feats_query, ids_train, feats_train, landmark_dict, voting_k=3): print('build index...') cpu_index = faiss.IndexFlatIP(feats_train.shape[1]) cpu_index.add(feats_train) sims, topk_idx = cpu_index.search(x=feats_query, k=voting_k) print('query search done.') df = pd.DataFrame(ids_query, columns=['id']) df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx]) rows = [] for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()), total=len(df)): image_ids = [name.split('/')[-1] for name in r.images.split(' ')] counter = Counter() for i, image_id in enumerate(image_ids[:voting_k]): landmark_id = landmark_dict[image_id] counter[landmark_id] += sims[imidx, i] landmark_id, score = counter.most_common(1)[0] rows.append({ 'id': r['id'], 'landmarks': f'{landmark_id} {score:.9f}', }) pred = pd.DataFrame(rows).set_index('id') pred['landmark_id'], pred['score'] = list( zip(*pred['landmarks'].apply(lambda x: str(x).split(' ')))) pred['score'] = pred['score'].astype(np.float32) / voting_k return pred
Example #9
Source File: reranking.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def __init__(self, database, method): super().__init__(database, method) self.index = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) if os.environ.get('CUDA_VISIBLE_DEVICES'): print('CUDA', os.environ.get('CUDA_VISIBLE_DEVICES')) self.index = faiss.index_cpu_to_all_gpus(self.index) self.add()
Example #10
Source File: reranking.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def __init__(self, database, method, M=128, nbits=8, nlist=316, nprobe=32): super().__init__(database, method) self.quantizer = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) self.index = faiss.IndexIVFPQ(self.quantizer, self.D, nlist, M, nbits) samples = database[np.random.permutation(np.arange(self.N))[:self.N]] print("[ANN] train") self.index.train(samples) self.add() self.index.nprobe = nprobe
Example #11
Source File: faiss.py From gntp with MIT License | 5 votes |
def _build_exact_index(self, data: np.ndarray): dimensionality = data.shape[1] if self.kernel_name in {'rbf'}: self.cpu_index_flat = faiss.IndexFlatL2(dimensionality) else: self.cpu_index_flat = faiss.IndexFlatIP(dimensionality) if not self.cpu: self.index = faiss.index_cpu_to_gpu(self.resource, 0, self.cpu_index_flat) else: self.index = self.cpu_index_flat self.index.add(data)
Example #12
Source File: streamlit_demo.py From RecNN with Apache License 2.0 | 5 votes |
def get_index(): import faiss from sklearn.preprocessing import normalize # test indexes indexL2 = faiss.IndexFlatL2(128) indexIP = faiss.IndexFlatIP(128) indexCOS = faiss.IndexFlatIP(128) mov_mat, _, _ = get_embeddings() mov_mat = mov_mat.numpy().astype('float32') indexL2.add(mov_mat) indexIP.add(mov_mat) indexCOS.add(normalize(mov_mat, axis=1, norm='l2')) return {'L2': indexL2, 'IP': indexIP, 'COS': indexCOS}
Example #13
Source File: knn.py From diffusion with MIT License | 5 votes |
def __init__(self, database, method): super().__init__(database, method) self.index = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) if os.environ.get('CUDA_VISIBLE_DEVICES'): self.index = faiss.index_cpu_to_all_gpus(self.index) self.add()
Example #14
Source File: knn.py From diffusion with MIT License | 5 votes |
def __init__(self, database, method, M=128, nbits=8, nlist=316, nprobe=64): super().__init__(database, method) self.quantizer = {'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2}[method](self.D) self.index = faiss.IndexIVFPQ(self.quantizer, self.D, nlist, M, nbits) samples = database[np.random.permutation(np.arange(self.N))[:self.N // 5]] print("[ANN] train") self.index.train(samples) self.add() self.index.nprobe = nprobe
Example #15
Source File: knn.py From learn-to-cluster with MIT License | 4 votes |
def __init__(self, feats, k, index_path='', index_key='', nprobe=128, omp_num_threads=None, rebuild_index=True, verbose=True, **kwargs): import faiss if omp_num_threads is not None: faiss.omp_set_num_threads(omp_num_threads) self.verbose = verbose with Timer('[faiss] build index', verbose): if index_path != '' and not rebuild_index and os.path.exists( index_path): print('[faiss] read index from {}'.format(index_path)) index = faiss.read_index(index_path) else: feats = feats.astype('float32') size, dim = feats.shape index = faiss.IndexFlatIP(dim) if index_key != '': assert index_key.find( 'HNSW') < 0, 'HNSW returns distances insted of sims' metric = faiss.METRIC_INNER_PRODUCT nlist = min(4096, 8 * round(math.sqrt(size))) if index_key == 'IVF': quantizer = index index = faiss.IndexIVFFlat(quantizer, dim, nlist, metric) else: index = faiss.index_factory(dim, index_key, metric) if index_key.find('Flat') < 0: assert not index.is_trained index.train(feats) index.nprobe = min(nprobe, nlist) assert index.is_trained print('nlist: {}, nprobe: {}'.format(nlist, nprobe)) index.add(feats) if index_path != '': print('[faiss] save index to {}'.format(index_path)) mkdir_if_no_exists(index_path) faiss.write_index(index, index_path) with Timer('[faiss] query topk {}'.format(k), verbose): knn_ofn = index_path + '.npz' if os.path.exists(knn_ofn): print('[faiss] read knns from {}'.format(knn_ofn)) self.knns = np.load(knn_ofn)['data'] else: sims, nbrs = index.search(feats, k=k) self.knns = [(np.array(nbr, dtype=np.int32), 1 - np.array(sim, dtype=np.float32)) for nbr, sim in zip(nbrs, sims)]