Python annoy.AnnoyIndex() Examples

The following are 30 code examples of annoy.AnnoyIndex(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module annoy , or try the search function .
Example #1
Source File: knn.py    From ivis with GNU General Public License v2.0 6 votes vote down vote up
def run(self):
        try:
            index = AnnoyIndex(self.n_dims, metric='angular')
            index.load(self.index_filepath)
            for i in range(self.data_indices[0], self.data_indices[1]):
                neighbour_indexes = index.get_nns_by_item(
                    i, self.k, search_k=self.search_k, include_distances=False)
                neighbour_indexes = np.array(neighbour_indexes,
                                                dtype=np.uint32)
                self.results_queue.put(
                    IndexNeighbours(row_index=i,
                                    neighbour_list=neighbour_indexes))
        except Exception as e:
            self.exception = e
        finally:
            self.results_queue.close() 
Example #2
Source File: __init__.py    From bbknn with MIT License 6 votes vote down vote up
def create_tree(data,approx,metric,use_faiss,n_trees):
	'''
	Create a faiss/cKDTree/KDTree/annoy index for nearest neighbour lookup. All undescribed input
	as in ``bbknn.bbknn()``. Returns the resulting index.

	Input
	-----
	data : ``numppy.array``
		PCA coordinates of a batch's cells to index.
	'''
	if approx:
		ckd = AnnoyIndex(data.shape[1],metric=metric)
		for i in np.arange(data.shape[0]):
			ckd.add_item(i,data[i,:])
		ckd.build(n_trees)
	elif metric == 'euclidean':
		if 'faiss' in sys.modules and use_faiss:
			ckd = faiss.IndexFlatL2(data.shape[1])
			ckd.add(data)
		else:
			ckd = cKDTree(data)
	else:
		ckd = KDTree(data,metric=metric)
	return ckd 
Example #3
Source File: test_knn.py    From ivis with GNU General Public License v2.0 6 votes vote down vote up
def test_build_sparse_annoy_index(annoy_index_file):
    data = np.random.choice([0, 1], size=(10, 5))
    sparse_data = csr_matrix(data)

    index = build_annoy_index(sparse_data, annoy_index_file)
    assert os.path.exists(annoy_index_file)

    loaded_index = AnnoyIndex(5, metric='angular')
    loaded_index.load(annoy_index_file)

    assert index.f == loaded_index.f == 5
    assert index.get_n_items() == loaded_index.get_n_items() == 10
    assert index.get_nns_by_item(0, 5) == loaded_index.get_nns_by_item(0, 5)

    index.unload()
    loaded_index.unload() 
Example #4
Source File: scanorama.py    From scanorama with MIT License 6 votes vote down vote up
def nn_approx(ds1, ds2, knn=KNN, metric='manhattan', n_trees=10):
    # Build index.
    a = AnnoyIndex(ds2.shape[1], metric=metric)
    for i in range(ds2.shape[0]):
        a.add_item(i, ds2[i, :])
    a.build(n_trees)

    # Search index.
    ind = []
    for i in range(ds1.shape[0]):
        ind.append(a.get_nns_by_vector(ds1[i, :], knn, search_k=-1))
    ind = np.array(ind)

    # Match.
    match = set()
    for a, b in zip(range(ds1.shape[0]), ind):
        for b_i in b:
            match.add((a, b_i))

    return match

# Find mutual nearest neighbors. 
Example #5
Source File: __init__.py    From magnitude with MIT License 6 votes vote down vote up
def get_approx_index_chunks(self):
        """Gets decompressed chunks of the AnnoyIndex of the vectors from
        the database."""
        try:
            db = self._db(force_new=True)
            with lz4.frame.LZ4FrameDecompressor() as decompressor:
                chunks = db.execute(
                    """
                        SELECT rowid,index_file
                        FROM `magnitude_approx`
                        WHERE trees = ?
                    """, (self.approx_trees,))
                for chunk in chunks:
                    yield decompressor.decompress(chunk[1])
                    if self.closed:
                        return
        except Exception as e:
            if self.closed:
                pass
            else:
                raise e 
Example #6
Source File: __init__.py    From magnitude with MIT License 6 votes vote down vote up
def get_approx_index_chunks(self):
        """Gets decompressed chunks of the AnnoyIndex of the vectors from
        the database."""
        try:
            db = self._db(force_new=True)
            with lz4.frame.LZ4FrameDecompressor() as decompressor:
                chunks = db.execute(
                    """
                        SELECT rowid,index_file
                        FROM `magnitude_approx`
                        WHERE trees = ?
                    """, (self.approx_trees,))
                for chunk in chunks:
                    yield decompressor.decompress(chunk[1])
                    if self.closed:
                        return
        except Exception as e:
            if self.closed:
                pass
            else:
                raise e 
Example #7
Source File: __init__.py    From magnitude with MIT License 6 votes vote down vote up
def get_approx_index_chunks(self):
        """Gets decompressed chunks of the AnnoyIndex of the vectors from
        the database."""
        try:
            db = self._db(force_new=True)
            with lz4.frame.LZ4FrameDecompressor() as decompressor:
                chunks = db.execute(
                    """
                        SELECT rowid,index_file
                        FROM `magnitude_approx`
                        WHERE trees = ?
                    """, (self.approx_trees,))
                for chunk in chunks:
                    yield decompressor.decompress(chunk[1])
                    if self.closed:
                        return
        except Exception as e:
            if self.closed:
                pass
            else:
                raise e 
Example #8
Source File: sketch.py    From geosketch with MIT License 6 votes vote down vote up
def label_approx(X, sites, site_labels, k=1):
    from annoy import AnnoyIndex

    assert(X.shape[1] == sites.shape[1])

    # Build index over site points.
    aindex = AnnoyIndex(sites.shape[1], metric='euclidean')
    for i in range(sites.shape[0]):
        aindex.add_item(i, sites[i, :])
    aindex.build(10)

    labels = []
    for i in range(X.shape[0]):
        # Find nearest site point.
        nearest_sites = aindex.get_nns_by_vector(X[i, :], k)
        if len(nearest_sites) < 1:
            labels.append(None)
            continue
        label = Counter([
            site_labels[ns] for ns in nearest_sites
        ]).most_common(1)[0][0]
        labels.append(label)

    return np.array(labels) 
Example #9
Source File: index.py    From rep0st with MIT License 5 votes vote down vote up
def load_index(self, index_id):
        if self.annoy_index is None:
            log.info("loading initial index with id {}", self.current_index)
        else:
            log.info("switching index from {} to {}", self.current_index, index_id)

        newindex = AnnoyIndex(108, metric='euclidean')
        newindex.load(config.index_config['index_path'] + 'index_' + str(index_id) + '.ann')
        if self.annoy_index is not None:
            self.annoy_index.unload()
        self.annoy_index = newindex
        self.current_index = index_id
        log.info("finished switching index. now using index {}", self.current_index) 
Example #10
Source File: embedding.py    From recoder with MIT License 5 votes vote down vote up
def __build_index(self, index_file):
    self.embedding_size = self.embeddings.shape[1]

    self.index = an.AnnoyIndex(self.embedding_size, metric='angular')

    for embedding_ind in range(self.embeddings.shape[0]):
      embedding = self.embeddings[embedding_ind, :]
      self.index.add_item(embedding_ind, embedding)

    self.index.build(self.n_trees)

    if self.id_map is None:
      self.id_map = dict([(i, i) for i in range(self.embeddings.shape[0])])

    self.inverse_id_map = dict([(v,k) for k,v in self.id_map.items()])

    if index_file:
      embeddings_file = index_file + '.embeddings'
      state = {
        'embedding_size': self.embedding_size,
        'id_map': self.id_map,
      }

      self.index.save(embeddings_file)
      with open(index_file, 'wb') as _index_file:
        pickle.dump(state, _index_file) 
Example #11
Source File: matching.py    From realtime-embeddings-matching with Apache License 2.0 5 votes vote down vote up
def __init__(self, index_file):
    logging.info('Initialising matching utility...')
    self.index = AnnoyIndex(VECTOR_LENGTH)
    self.index.load(index_file, prefault=True)
    logging.info('Annoy index {} is loaded'.format(index_file))
    with open(index_file + '.mapping', 'rb') as handle:
      self.mapping = pickle.load(handle)
    logging.info('Mapping file {} is loaded'.format(index_file + '.mapping'))
    logging.info('Matching utility initialised.') 
Example #12
Source File: text_graph.py    From reveal-graph-embedding with Apache License 2.0 5 votes vote down vote up
def make_text_graph(user_lemma_matrix, dimensionality, metric, number_of_estimators, number_of_neighbors):
    user_lemma_matrix_tfidf = augmented_tf_idf(user_lemma_matrix)
    # print(user_lemma_matrix_tfidf.shape)
    if (user_lemma_matrix_tfidf.shape[0] <= dimensionality) or (user_lemma_matrix_tfidf.shape[1] <= dimensionality):
        X_svd = user_lemma_matrix_tfidf.toarray()
    else:
        X_svd = TruncatedSVD(n_components=dimensionality).fit_transform(user_lemma_matrix_tfidf)

    annoy_index = AnnoyIndex(X_svd.shape[1], metric=metric)

    for q in range(X_svd.shape[0]):
        annoy_index.add_item(q, X_svd[q, :])

    annoy_index.build(number_of_estimators)

    row = list()
    col = list()
    data = list()
    for q in range(X_svd.shape[0]):
        neighbors, distances = annoy_index.get_nns_by_item(q, number_of_neighbors, include_distances=True)

        row.extend([q] * number_of_neighbors)
        col.extend(neighbors)
        data.extend(distances)

    row = np.array(row, dtype=np.int64)
    col = np.array(col, dtype=np.int64)
    data = np.array(data, dtype=np.float64)

    text_graph = spsp.coo_matrix((data,
                                  (row,
                                   col)),
                                 shape=(X_svd.shape[0],
                                        X_svd.shape[0]))
    text_graph = spsp.csr_matrix(text_graph)

    return text_graph 
Example #13
Source File: index_embeddings.py    From jann with MIT License 5 votes vote down vote up
def index_embeddings(args):
    """Main run function for indexing the embeddings."""
    unique_strings_path = args.infile + '.embedded.pkl_unique_strings.csv'

    # Load the unique lines
    with open(unique_strings_path) as f:
        unique_strings = [line.rstrip() for line in f]

    unique_embeddings_path = (args.infile +
                              '.embedded.pkl_unique_strings_embeddings.txt')
    # Load the unique embeddings
    with open(unique_embeddings_path) as f:
        unique_embeddings = [[float(x) for x in
                              line.strip().split()] for line in f]

    tf.logging.info('Loaded {} unique strings, {} embeddings of dimension {}'.
                    format(len(unique_strings),
                           len(unique_embeddings),
                           len(unique_embeddings[0])))

    # Length of item vector that will be indexed
    nn_forest = AnnoyIndex(512, metric='angular')

    for i in range(len(unique_strings)):
        v = unique_embeddings[i]
        nn_forest.add_item(i, v)

    # Build an approximate nearest neighbor forest with num_trees
    nn_forest.build(int(args.num_trees))
    output_path = args.infile + '.ann'
    nn_forest.save(output_path)

    tf.logging.info('Index forest built {}'.format(output_path))

    return True 
Example #14
Source File: generateArtificialSessions.py    From MSMARCO-Conversational-Search with MIT License 5 votes vote down vote up
def generateAnnoy(real, artificial, annoyFilename, dimensions):
    idx2vec = np.array(artificial[2])
    t = AnnoyIndex(dimensions)
    for j in range(len(artificial[2])):
        t.add_item(j,idx2vec[j])
    print('Done Adding items to AnnoyIndex')
    t.build(TREESIZE)
    print('Done Building AnnoyIndex')
    t.save(annoyFilename)
    return t 
Example #15
Source File: noveltysearchlive.py    From Novelty-Search-Live with GNU General Public License v3.0 5 votes vote down vote up
def AddToTrain(individual):
    global annoy_train
    global test_db
    global IND_SIZE
    global config

    max_memory = 5
    if set.get_master_volume() == 1:
        print set.get_master_volume()
        set.set_master_volume(0.85)

        test_db.append(individual)
        print "SAVING TO TRAINING SET. TestDB Size: " + str(len(test_db))

        annoy_train = AnnoyIndex(IND_SIZE)
        annoy_train.add_item(annoy_train.get_n_items(), individual)
        annoy_train.build(config["annoy_tree"]) # 10 trees

        if len(test_db) > max_memory:
            test_db.pop(0)
            print "delete old memory entry"

    if set.get_master_volume() == 0:
        test_db = []
        # gen_record = []
        annoy_train = AnnoyIndex(IND_SIZE)
        annoy_train.build(config["annoy_tree"]) # 10 trees
        print "clean set"
        set.set_master_volume(0.85)


############ App Main Loop ############ 
Example #16
Source File: dnd.py    From tensorflow-rl with Apache License 2.0 5 votes vote down vote up
def __init__(self, capacity=100000, key_size=128, cache_size=32, alpha=0.1):
        self.alpha = alpha
        self.capacity = capacity
        self.lru_cache = LRUCache(capacity)
        self.dup_cache = deque(maxlen=cache_size)
        self.index = AnnoyIndex(key_size, metric='euclidean')
        self.keys = np.zeros((capacity, key_size), dtype=np.float32)
        self.values = np.zeros((capacity,), dtype=np.float32)
        self.insert_idx = 0
        self.insertions = 0 
Example #17
Source File: annoy.py    From ann-benchmarks with MIT License 5 votes vote down vote up
def fit(self, X):
        self._annoy = annoy.AnnoyIndex(X.shape[1], metric=self._metric)
        for i, x in enumerate(X):
            self._annoy.add_item(i, x.tolist())
        self._annoy.build(self._n_trees) 
Example #18
Source File: test_knn.py    From ivis with GNU General Public License v2.0 5 votes vote down vote up
def test_dense_annoy_index(annoy_index_file):
    data = np.random.choice([0, 1], size=(10, 5))
    index = build_annoy_index(data, annoy_index_file)
    assert os.path.exists(annoy_index_file)

    loaded_index = AnnoyIndex(5, metric='angular')
    loaded_index.load(annoy_index_file)

    assert index.f == loaded_index.f == 5
    assert index.get_n_items() == loaded_index.get_n_items() == 10
    assert index.get_nns_by_item(0, 5) == loaded_index.get_nns_by_item(0, 5)

    index.unload()
    loaded_index.unload() 
Example #19
Source File: background_job.py    From rep0st with MIT License 5 votes vote down vote up
def build_index(index_id):
    n_trees = config.index_config['tree_count']

    log.info("started index build")
    session = rep.database.DBSession()
    count = session.query(Feature).filter(Feature.type == FeatureType.FEATURE_VECTOR).count()
    index = AnnoyIndex(108, metric='euclidean')
    cnt = 0
    log.info("adding {} features to index", count)
    start = time.time()
    for feature in session.query(Feature).filter(Feature.type == FeatureType.FEATURE_VECTOR).yield_per(1000):
        arr = np.asarray(bytearray(feature.data)).astype(np.float32)
        index.add_item(feature.post_id, arr)
        cnt += 1
        if cnt % 10000 == 0:
            log.debug("added {}/{} features to the index", cnt, count)
    session.close()
    stop = time.time()
    log.info("added all {} features to the index in {}", count, str(datetime.timedelta(seconds=stop - start)))
    log.info("building index with {} trees. this will take a while...", n_trees)
    start = time.time()
    index.build(20)
    index_file = config.index_config['index_path'] + "index_" + str(index_id) + ".ann"
    log.info("saving index to file {}", index_file)
    index.save(index_file)
    stop = time.time()

    log.debug("finished building of index. it took {}", str(datetime.timedelta(seconds=stop - start))) 
Example #20
Source File: hf_sampler.py    From hfsoftmax with MIT License 5 votes vote down vote up
def __init__(self,
                 rank,
                 fdim,
                 sample_num,
                 num_output,
                 bias=False,
                 ntrees=50,
                 interval=100,
                 start_iter=0,
                 midw='0',
                 midb='1'):
        super(HFSampler, self).__init__()
        self.rank = rank
        self.fdim = fdim
        self.sample_num = sample_num
        self.num_output = num_output
        self.full_cls = np.arange(self.num_output)
        # init param client
        self.client = ParameterClient(rank)
        self.midw = midw
        self.midb = midb
        self.is_bias = bias
        self.client.add_matrix(self.midw, [self.num_output, self.fdim])
        if self.is_bias:
            self.client.add_matrix(self.midb, [self.num_output, 1])
        # init hashing forest
        self.ntrees = ntrees
        self.interval = interval
        self.start_iter = start_iter
        self.iter = start_iter
        self.test_iter = start_iter
        self.anns = AnnoyIndex(self.fdim)
        self.pool = Pool(processes=2) 
Example #21
Source File: hf_sampler.py    From hfsoftmax with MIT License 5 votes vote down vote up
def _update_hf(self):
        if not self.iter % self.interval == 0 and \
            not self.iter == self.start_iter:
            return
        w = self.client.get_value_by_rows(self.midw, self.full_cls)
        self.anns = AnnoyIndex(self.fdim)
        for i, v in enumerate(w):
            self.anns.add_item(i, v)
        self.anns.build(self.ntrees) 
Example #22
Source File: cluster_corr.py    From altanalyze with Apache License 2.0 5 votes vote down vote up
def nearest_neighbors(collection, num_neighbors=10, n_trees=100):
    """
    Finds the num_neighbors nearest neighbors to each cell in the sparse matrix

    Return result is a dictionary of lists, where the key is an index into the cells, 
    and the value is the neighbors of that cell
    """
    nn_idx = AnnoyIndex(collection.num_genes())
    # Add the elements in reverse order because Annoy allocates the memory based on
    # the value of the element added - so adding in increasing order will trigger
    # lots of allocations
    for i in range(collection.num_cells()-1, -1, -1):
        nn_idx.add_item(i, collection.get_cell_expression_vector(i))
    nn_idx.build(n_trees)
    return { i: nn_idx.get_nns_by_item(i, num_neighbors) for i in range(collection.num_cells()) } 
Example #23
Source File: similarity_finder.py    From hub with Apache License 2.0 5 votes vote down vote up
def __init__(
      self,
      module_url,
      index_file_path,
      mapping_file_path,
      dimensions,
      random_projection_matrix_file,
  ):

    # Load the TF-Hub module
    print('Loading the TF-Hub module...')
    self.embed_fn = hub.load(module_url)
    print('TF-hub module is loaded.')

    dimensions = self.embed_fn(['']).shape[1]

    self.random_projection_matrix = None
    if tf.io.gfile.exists(random_projection_matrix_file):
      with open(random_projection_matrix_file, 'rb') as handle:
        self.random_projection_matrix = pickle.load(handle)
      dimensions = self.random_projection_matrix.shape[1]

    self.index = annoy.AnnoyIndex(dimensions, metric=_METRIC)
    self.index.load(index_file_path, prefault=True)
    print('Annoy index is loaded.')
    with open(mapping_file_path, 'rb') as handle:
      self.mapping = pickle.load(handle)
    print('Mapping file is loaded.') 
Example #24
Source File: differentiable_neural_dictionary.py    From coach with Apache License 2.0 5 votes vote down vote up
def __init__(self, dict_size, key_width, new_value_shift_coefficient=0.1, batch_size=100, key_error_threshold=0.01,
                 num_neighbors=50, override_existing_keys=True, rebuild_on_every_update=False):
        self.rebuild_on_every_update = rebuild_on_every_update
        self.max_size = dict_size
        self.curr_size = 0
        self.new_value_shift_coefficient = new_value_shift_coefficient
        self.num_neighbors = num_neighbors
        self.override_existing_keys = override_existing_keys

        self.index = AnnoyIndex(key_width, metric='euclidean')
        self.index.set_seed(1)

        self.embeddings = np.zeros((dict_size, key_width))
        self.values = np.zeros(dict_size)
        self.additional_data = [None] * dict_size

        self.lru_timestamps = np.zeros(dict_size)
        self.current_timestamp = 0.0

        # keys that are in this distance will be considered as the same key
        self.key_error_threshold = key_error_threshold

        self.initial_update_size = batch_size
        self.min_update_size = self.initial_update_size
        self.key_dimension = key_width
        self.value_dimension = 1
        self._reset_buffer()

        self.built_capacity = 0 
Example #25
Source File: differentiable_neural_dictionary.py    From coach with Apache License 2.0 5 votes vote down vote up
def load_dnd(model_dir):
    latest_checkpoint_id = -1
    latest_checkpoint = ''
    # get all checkpoint files
    for fname in os.listdir(model_dir):
        path = os.path.join(model_dir, fname)
        if os.path.isdir(path) or fname.split('.')[-1] != 'srs':
            continue
        checkpoint_id = int(fname.split('_')[0])
        if checkpoint_id > latest_checkpoint_id:
            latest_checkpoint = fname
            latest_checkpoint_id = checkpoint_id

    with open(os.path.join(model_dir, str(latest_checkpoint)), 'rb') as f:
        DND = pickle.load(f)

        for a in range(DND.num_actions):
            DND.dicts[a].index = AnnoyIndex(512, metric='euclidean')
            DND.dicts[a].index.set_seed(1)

            for idx, key in zip(range(DND.dicts[a].curr_size), DND.dicts[a].embeddings[:DND.dicts[a].curr_size]):
                DND.dicts[a].index.add_item(idx, key)

            DND.dicts[a].index.build(50)

    return DND 
Example #26
Source File: sketch.py    From geosketch with MIT License 5 votes vote down vote up
def srs_positive_annoy(X, N, seed=None, replace=False, prenormalized=False):
    from annoy import AnnoyIndex

    n_samples, n_features = X.shape

    if not replace and N > n_samples:
        raise ValueError('Cannot sample {} elements from {} elements '
                         'without replacement'.format(N, n_samples))
    if not replace and N == n_samples:
        return range(N)

    if not seed is None:
        np.random.seed(seed)

    X = X - X.min(0)

    if not prenormalized:
        X = normalize(X).astype('float32')

    srs_idx = set()
    for i in range(N):
        aindex = AnnoyIndex(X.shape[1], metric='euclidean')
        for i in range(X.shape[0]):
            if i not in srs_idx:
                aindex.add_item(i, X[i, :])
        aindex.build(10)

        Phi_i = np.random.normal(size=(n_features))
        Phi_i /= np.linalg.norm(Phi_i)

        nearest_site = aindex.get_nns_by_vector(Phi_i, 1)
        srs_idx.add(nearest_site[0])

    return sorted(srs_idx) 
Example #27
Source File: approximate_als.py    From implicit with MIT License 5 votes vote down vote up
def fit(self, Ciu, show_progress=True):
        # delay loading the annoy library in case its not installed here
        import annoy

        # train the model
        super(AnnoyAlternatingLeastSquares, self).fit(Ciu, show_progress)

        # build up an Annoy Index with all the item_factors (for calculating
        # similar items)
        if self.approximate_similar_items:
            log.debug("Building annoy similar items index")

            self.similar_items_index = annoy.AnnoyIndex(
                self.item_factors.shape[1], 'angular')
            for i, row in enumerate(self.item_factors):
                self.similar_items_index.add_item(i, row)
            self.similar_items_index.build(self.n_trees)

        # build up a separate index for the inner product (for recommend
        # methods)
        if self.approximate_recommend:
            log.debug("Building annoy recommendation index")
            self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
            self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular')
            for i, row in enumerate(extra):
                self.recommend_index.add_item(i, row)
            self.recommend_index.build(self.n_trees) 
Example #28
Source File: embedding.py    From recoder with MIT License 5 votes vote down vote up
def __load_index(self, index_file):
    log.info('Loading index file from {}'.format(index_file))
    with open(index_file, 'rb') as _index_file:
      state = pickle.load(_index_file)
    self.embedding_size = state['embedding_size']
    self.id_map = state['id_map']
    embeddings_file = index_file + '.embeddings'
    self.index = an.AnnoyIndex(self.embedding_size, metric='angular')
    self.index.load(embeddings_file)
    self.inverse_id_map = dict([(v,k) for k,v in self.id_map.items()]) 
Example #29
Source File: recall.py    From nlp_research with MIT License 5 votes vote down vote up
def __init__(self, vecs):
        assert len(vecs)>0, 'no vecs available to init AnnoyIndex'
        size = len(vecs[0])
        self.annoy_model = AnnoyIndex(size)
        for idx,vec in enumerate(vecs):
            self.annoy_model.add_item(idx, vec)
        self.annoy_model.build(50) 
Example #30
Source File: annoyVectorIndex.py    From Seq2Seq-Vis with Apache License 2.0 5 votes vote down vote up
def __init__(self, file_name, dim_vector=500):
        self.u = AnnoyIndex(dim_vector)
        self.u.load(file_name)