Python whoosh.index.open_dir() Examples
The following are 12
code examples of whoosh.index.open_dir().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
whoosh.index
, or try the search function
.
Example #1
Source File: engine.py From txtorg with MIT License | 6 votes |
def _init_index(self): if not os.path.exists(self.corpus.path): os.mkdir(self.corpus.path) analyzer = self.corpus.analyzer self.analyzer = self.corpus.analyzer if exists_in(self.corpus.path): ix = open_dir(self.corpus.path) else: # may need to remove this? how can we have a schema if we don't know the...uh...schema? schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer), path=ID(stored=True)) ix = create_in(self.corpus.path,schema) writer = ix.writer() writer.commit() self.index = ix self.searcher = ix.searcher(); #self.reader = IndexReader.open(self.lucene_index, True) self.reader = ix.reader(); #self.analyzer = self.corpus.analyzer
Example #2
Source File: whoosh_write.py From Penny-Dreadful-Tools with GNU General Public License v3.0 | 5 votes |
def update_card(self, card: Card) -> None: ix = open_dir(WhooshConstants.index_dir) update_index(ix, [card])
Example #3
Source File: whoosh_search.py From Penny-Dreadful-Tools with GNU General Public License v3.0 | 5 votes |
def __init__(self) -> None: self.ix = open_dir(WhooshConstants.index_dir) self.initialize_trie()
Example #4
Source File: search.py From markdown-search with GNU General Public License v2.0 | 5 votes |
def open_index(self, index_folder, create_new=False): self.index_folder = index_folder if create_new: if os.path.exists(index_folder): shutil.rmtree(index_folder) print "deleted index folder: " + index_folder if not os.path.exists(index_folder): os.mkdir(index_folder) exists = index.exists_in(index_folder) stemming_analyzer = StemmingAnalyzer() schema = Schema( path=ID(stored=True, unique=True) , filename=TEXT(stored=True, field_boost=100.0) , tags=KEYWORD(stored=True, scorable=True, field_boost=80.0) , headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0) , doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0) , emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0) , content=TEXT(stored=True, analyzer=stemming_analyzer) , time=STORED ) if not exists: self.ix = index.create_in(index_folder, schema) else: self.ix = index.open_dir(index_folder)
Example #5
Source File: whoosh_backend.py From flask-msearch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def init(self): ix_path = os.path.join(self.path, self.name) if whoosh_index.exists_in(ix_path): return whoosh_index.open_dir(ix_path) if not os.path.exists(ix_path): os.makedirs(ix_path) return whoosh_index.create_in(ix_path, self.schema)
Example #6
Source File: search.py From databrewer with MIT License | 5 votes |
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False): self.schema = schema if exists_in(index_dir) and not force_create: self.index = open_dir(index_dir, schema=schema) else: self.index = create_in(index_dir, schema=schema)
Example #7
Source File: whoosh_tool.py From TorCMS with MIT License | 5 votes |
def __init__(self): self.whbase = open_dir("database/whoosh") self.parser = QueryParser("content", schema=self.whbase.schema)
Example #8
Source File: whooshsearch.py From pySINDy with MIT License | 5 votes |
def __init__(self, db_path): ensuredir(db_path) if index.exists_in(db_path): self.index = index.open_dir(db_path) else: self.index = index.create_in(db_path, schema=self.schema) self.qparser = QueryParser('text', self.schema)
Example #9
Source File: Flask_search.py From AIL-framework with GNU Affero General Public License v3.0 | 5 votes |
def get_item_count(dirs): ix = index.open_dir(os.path.join(baseindexpath, dirs)) return ix.doc_count_all()
Example #10
Source File: models.py From realms-wiki with GNU General Public License v2.0 | 5 votes |
def __init__(self, index_path, language): from whoosh import index as whoosh_index from whoosh.fields import Schema, TEXT, ID from whoosh import qparser from whoosh.highlight import UppercaseFormatter from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer from whoosh.lang import has_stemmer, has_stopwords import os if not has_stemmer(language) or not has_stopwords(language): # TODO Display a warning? analyzer = SimpleAnalyzer() else: analyzer = LanguageAnalyzer(language) self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer)) self.formatter = UppercaseFormatter() self.index_path = index_path if not os.path.exists(index_path): try: os.mkdir(index_path) except OSError as e: sys.exit("Error creating Whoosh index: %s" % e) if whoosh_index.exists_in(index_path): try: self.search_index = whoosh_index.open_dir(index_path) except whoosh_index.IndexError as e: sys.exit("Error opening whoosh index: {0}".format(e)) else: self.search_index = whoosh_index.create_in(index_path, self.schema) self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema) self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
Example #11
Source File: index_whoosh.py From BREDS with GNU Lesser General Public License v3.0 | 5 votes |
def create_index(): regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U) tokenizer = RegexTokenizer(regex_tokenize) schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer)) if not os.path.exists("index_full"): os.mkdir("index_full") idx = create_in("index_full", schema) else: idx = open_dir("index_full") return idx
Example #12
Source File: boolean.py From cltk with MIT License | 4 votes |
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50): """Send query to a corpus's index. `save_file` is a filename. :type save_file: str >>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library') >>> # results = cltk_index.corpus_query('amicitia') """ _index = open_dir(self.index_path) output_str = '' with _index.searcher() as searcher: _query = QueryParser("content", _index.schema).parse(query) results = searcher.search(_query, limit=None) results.fragmenter.charlimit = None # Allow larger fragments results.fragmenter.maxchars = window_size # Show more context before and after results.fragmenter.surround = surround_size docs_number = searcher.doc_count_all() output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>' for hit in results: author = hit['author'] filepath = hit['path'] output_str += author + '</br>' output_str += filepath + '</br>' with open(filepath) as file_open: file_contents = file_open.read() highlights = hit.highlights("content", text=file_contents, top=10000000) lines = highlights.split('\n') #lines_numbers = [l for l in lines] lines_br = '</br>'.join(lines) lines_number_approx = len(lines) output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>' output_str += lines_br + '</br></br>' if save_file: user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search') output_path = os.path.join(user_dir, save_file + '.html') try: with open(output_path, 'w') as file_open: file_open.write(output_str) except FileNotFoundError: os.mkdir(user_dir) with open(output_path, 'w') as file_open: file_open.write(output_str) else: return output_str