Python faiss.Clustering() Examples
The following are 6
code examples of faiss.Clustering().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
faiss
, or try the search function
.
Example #1
Source File: faiss_kmeans.py From cdp with MIT License | 10 votes |
def run_kmeans(x, nmb_clusters, verbose=False): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) # Change faiss seed at each k-means so that the randomly picked # initialization centroids do not correspond to the same feature ids # from an epoch to another. clus.seed = np.random.randint(1234) clus.niter = 20 clus.max_points_per_centroid = 10000000 res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) _, I = index.search(x, 1) losses = faiss.vector_to_array(clus.obj) if verbose: print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1]
Example #2
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 6 votes |
def do_clustering(features, num_clusters, gpu_ids=None, num_pca_components=None, niter=100, nredo=1, verbose=0): logging.debug('FAISS: using GPUs {}'.format(gpu_ids)) features = np.asarray(features.reshape(features.shape[0], -1), dtype=np.float32) if num_pca_components is not None: features = preprocess_features(features, d=num_pca_components, niter=niter, nredo=nredo, verbose=verbose) logging.debug('FAISS: clustering...') t0 = time.time() centroids = train_kmeans(features, num_clusters, gpu_ids=gpu_ids, verbose=1) labels = compute_cluster_assignment(centroids, features) t1 = time.time() logging.debug("FAISS: Clustering total elapsed time: %.3f m" % ((t1 - t0) / 60.0)) return labels
Example #3
Source File: nmi.py From classification_metric_learning with Apache License 2.0 | 6 votes |
def test_nmi_faiss(embeddings, labels): res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 unique_labels = np.unique(labels) d = embeddings.shape[1] kmeans = faiss.Clustering(d, unique_labels.size) kmeans.verbose = True kmeans.niter = 300 kmeans.nredo = 10 kmeans.seed = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) kmeans.train(embeddings, index) dists, pred_labels = index.search(embeddings, 1) pred_labels = pred_labels.squeeze() nmi = normalized_mutual_info_score(labels, pred_labels) print("NMI: {}".format(nmi)) return nmi
Example #4
Source File: run_index.py From denspi with Apache License 2.0 | 6 votes |
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False): d = data.shape[1] index_flat = faiss.IndexFlatL2(d) # make it into a gpu index if cuda: res = faiss.StandardGpuResources() index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) clus = faiss.Clustering(d, num_clusters) clus.verbose = True clus.niter = niter clus.train(data, index_flat) centroids = faiss.vector_float_to_array(clus.centroids) centroids = centroids.reshape(num_clusters, d) if hnsw: quantizer = faiss.IndexHNSWFlat(d, 32) quantizer.hnsw.efSearch = 128 quantizer.train(centroids) quantizer.add(centroids) else: quantizer = faiss.IndexFlatL2(d) quantizer.add(centroids) faiss.write_index(quantizer, quantizer_path)
Example #5
Source File: stat_utils.py From pytorch-metric-learning with MIT License | 6 votes |
def run_kmeans(x, nmb_clusters): """ Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape logging.info("running k-means clustering with k=%d"%nmb_clusters) logging.info("embedding dimensionality is %d"%d) # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 index = faiss.IndexFlatL2(d) if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_all_gpus(index) # perform the training clus.train(x, index) _, idxs = index.search(x, 1) return [int(n[0]) for n in idxs] # modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization
Example #6
Source File: faissext.py From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 | 4 votes |
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0): """ Runs k-means clustering on one or several GPUs """ assert np.all(~np.isnan(x)), 'x contains NaN' assert np.all(np.isfinite(x)), 'x contains Inf' if isinstance(gpu_ids, int): gpu_ids = [gpu_ids] assert gpu_ids is None or len(gpu_ids) d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = bool(verbose) kmeans.niter = niter kmeans.nredo = nredo # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 if gpu_ids is not None: res = [faiss.StandardGpuResources() for i in gpu_ids] flat_config = [] for i in gpu_ids: cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if len(gpu_ids) == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(len(gpu_ids))] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) else: index = faiss.IndexFlatL2(d) # perform the training kmeans.train(x, index) centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) #logging.debug("Final objective: %.4g" % objective[-1]) return centroids.reshape(num_clusters, d)