Python whoosh.index.create_in() Examples
The following are 11
code examples of whoosh.index.create_in().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
whoosh.index
, or try the search function
.
Example #1
Source File: engine.py From txtorg with MIT License | 6 votes |
def _init_index(self): if not os.path.exists(self.corpus.path): os.mkdir(self.corpus.path) analyzer = self.corpus.analyzer self.analyzer = self.corpus.analyzer if exists_in(self.corpus.path): ix = open_dir(self.corpus.path) else: # may need to remove this? how can we have a schema if we don't know the...uh...schema? schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer), path=ID(stored=True)) ix = create_in(self.corpus.path,schema) writer = ix.writer() writer.commit() self.index = ix self.searcher = ix.searcher(); #self.reader = IndexReader.open(self.lucene_index, True) self.reader = ix.reader(); #self.analyzer = self.corpus.analyzer
Example #2
Source File: indexfiles.py From txtorg with MIT License | 6 votes |
def __init__(self, root, storeDir, analyzer, args_dir = None): self.args_dir = args_dir if not os.path.exists(storeDir): os.mkdir(storeDir) schema = Schema(name=TEXT(stored=True), path=ID(stored=True), txtorg_id=ID(stored=True), contents=TEXT(stored=False,vector=True,analyzer=analyzer())) ix = create_in(storeDir, schema) writer = ix.writer() print 'document dir is', root self.indexDocs(root, writer) print 'optimizing index', writer.commit(optimize=True) print 'done' self.index = ix self.writer = writer self.reader = ix.reader()
Example #3
Source File: whoosh_write.py From Penny-Dreadful-Tools with GNU General Public License v3.0 | 5 votes |
def rewrite_index(self, cards: List[Card]) -> None: print('Rewriting index in {d}'.format(d=WhooshConstants.index_dir)) ensure_dir_exists(WhooshConstants.index_dir) ix = create_in(WhooshConstants.index_dir, self.schema) update_index(ix, cards) # pylint: disable=no-self-use
Example #4
Source File: search.py From markdown-search with GNU General Public License v2.0 | 5 votes |
def open_index(self, index_folder, create_new=False): self.index_folder = index_folder if create_new: if os.path.exists(index_folder): shutil.rmtree(index_folder) print "deleted index folder: " + index_folder if not os.path.exists(index_folder): os.mkdir(index_folder) exists = index.exists_in(index_folder) stemming_analyzer = StemmingAnalyzer() schema = Schema( path=ID(stored=True, unique=True) , filename=TEXT(stored=True, field_boost=100.0) , tags=KEYWORD(stored=True, scorable=True, field_boost=80.0) , headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0) , doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0) , emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0) , content=TEXT(stored=True, analyzer=stemming_analyzer) , time=STORED ) if not exists: self.ix = index.create_in(index_folder, schema) else: self.ix = index.open_dir(index_folder)
Example #5
Source File: whoosh_backend.py From flask-msearch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def init(self): ix_path = os.path.join(self.path, self.name) if whoosh_index.exists_in(ix_path): return whoosh_index.open_dir(ix_path) if not os.path.exists(ix_path): os.makedirs(ix_path) return whoosh_index.create_in(ix_path, self.schema)
Example #6
Source File: search.py From databrewer with MIT License | 5 votes |
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False): self.schema = schema if exists_in(index_dir) and not force_create: self.index = open_dir(index_dir, schema=schema) else: self.index = create_in(index_dir, schema=schema)
Example #7
Source File: whooshsearch.py From pySINDy with MIT License | 5 votes |
def __init__(self, db_path): ensuredir(db_path) if index.exists_in(db_path): self.index = index.open_dir(db_path) else: self.index = index.create_in(db_path, schema=self.schema) self.qparser = QueryParser('text', self.schema)
Example #8
Source File: models.py From realms-wiki with GNU General Public License v2.0 | 5 votes |
def __init__(self, index_path, language): from whoosh import index as whoosh_index from whoosh.fields import Schema, TEXT, ID from whoosh import qparser from whoosh.highlight import UppercaseFormatter from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer from whoosh.lang import has_stemmer, has_stopwords import os if not has_stemmer(language) or not has_stopwords(language): # TODO Display a warning? analyzer = SimpleAnalyzer() else: analyzer = LanguageAnalyzer(language) self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer)) self.formatter = UppercaseFormatter() self.index_path = index_path if not os.path.exists(index_path): try: os.mkdir(index_path) except OSError as e: sys.exit("Error creating Whoosh index: %s" % e) if whoosh_index.exists_in(index_path): try: self.search_index = whoosh_index.open_dir(index_path) except whoosh_index.IndexError as e: sys.exit("Error opening whoosh index: {0}".format(e)) else: self.search_index = whoosh_index.create_in(index_path, self.schema) self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema) self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
Example #9
Source File: models.py From realms-wiki with GNU General Public License v2.0 | 5 votes |
def delete_index(self, index): from whoosh import index as whoosh_index self.search_index.close() self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
Example #10
Source File: index_whoosh.py From BREDS with GNU Lesser General Public License v3.0 | 5 votes |
def create_index(): regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U) tokenizer = RegexTokenizer(regex_tokenize) schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer)) if not os.path.exists("index_full"): os.mkdir("index_full") idx = create_in("index_full", schema) else: idx = open_dir("index_full") return idx
Example #11
Source File: get_template_based_result.py From DualRL with MIT License | 4 votes |
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1): schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True)) index_i = re.findall('\d', train_data_path)[0] index_path = "../tmp/ix_index/" + index_i if not os.path.exists(index_path): os.makedirs(index_path) ix = create_in(index_path, schema) writer = ix.writer() def get_cpr(line): lines = line.lower().strip().split('\t') context = '' post = lines[0] response = lines[1] return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8') def load_train_data(file_name, writer): f = open(file_name) for line in f: context, response, post = get_cpr(line) if context != '': writer.add_document(context=context, response=response, post=post) else: writer.add_document(response=response, post=post) writer.commit() def get_query(line, ix): lines = line.strip().split('\t') post = lines[0].decode('utf-8') q2 = QueryParser("post", ix.schema).parse(post) terms = list(q2.all_terms()) query = Or([Term(*x) for x in terms]) return query load_train_data(train_data_path, writer) f = open(test_data_path, 'r') fw_search = open(dst_result_path, 'w') with ix.searcher(weighting=scoring.TF_IDF()) as searcher: c = searcher.collector(limit=10) tlc = TimeLimitCollector(c, timelimit=10.0) for line in f: try: query = get_query(line, ix) searcher.search_with_collector(query, tlc) results = tlc.results() for i in range(min(len(results), save_n_best_search)): fw_search.write( line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n') except Exception as e: print('TimeLimit, ignore it!') print(line) fw_search.close()