Python whoosh.qparser.QueryParser() Examples
The following are 16
code examples of whoosh.qparser.QueryParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
whoosh.qparser
, or try the search function
.
Example #1
Source File: search.py From markdown-search with GNU General Public License v2.0 | 6 votes |
def search(self, query_list, fields=None): with self.ix.searcher() as searcher: query_string = " ".join(query_list) query = None if "\"" in query_string or ":" in query_string: query = QueryParser("content", self.schema).parse(query_string) elif len(fields) == 1 and fields[0] == "filename": pass elif len(fields) == 1 and fields[0] == "tags": pass elif len(fields) == 2: pass else: fields = ["tags", "headlines", "content", "filename", "doubleemphasiswords", "emphasiswords"] if not query: query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string) parsed_query = "%s" % query print "query: %s" % parsed_query results = searcher.search(query, terms=False, scored=True, groupedby="path") key_terms = results.key_terms("tags", docs=100, numterms=100) tag_cloud = [keyword for keyword, score in key_terms] search_result = self.create_search_result(results) return parsed_query, search_result, tag_cloud
Example #2
Source File: search.py From databrewer with MIT License | 5 votes |
def search(self, query, search_field='content'): if not isinstance(query, Query): parser = QueryParser(search_field, self.schema, termclass=Variations) query = parser.parse(query) with self.index.searcher() as searcher: for hit in searcher.search(query, limit=100): yield self._decode(hit['data'])
Example #3
Source File: whoosh_cn_backend.py From thirtylol with MIT License | 5 votes |
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if LOCALS.RAM_STORE is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
Example #4
Source File: whoosh_tool.py From TorCMS with MIT License | 5 votes |
def __init__(self): self.whbase = open_dir("database/whoosh") self.parser = QueryParser("content", schema=self.whbase.schema)
Example #5
Source File: whoosh_cn_backend.py From Django-blog with MIT License | 5 votes |
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema( connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
Example #6
Source File: whooshsearch.py From pySINDy with MIT License | 5 votes |
def __init__(self, db_path): ensuredir(db_path) if index.exists_in(db_path): self.index = index.open_dir(db_path) else: self.index = index.create_in(db_path, schema=self.schema) self.qparser = QueryParser('text', self.schema)
Example #7
Source File: whoosh_cn_backend.py From blog with Apache License 2.0 | 5 votes |
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
Example #8
Source File: __init__.py From pixelated-user-agent with GNU Affero General Public License v3.0 | 5 votes |
def _search_tag_groups(self, is_filtering_tags): seen = None query_parser = QueryParser('tag', self._index.schema) options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count} with self._index.searcher() as searcher: total = searcher.search(query_parser.parse('*'), **options).groups() if not is_filtering_tags: seen = searcher.search(query_parser.parse("* AND flags:%s" % Status.SEEN), **options).groups() return seen, total
Example #9
Source File: __init__.py From pixelated-user-agent with GNU Affero General Public License v3.0 | 5 votes |
def _search_with_options(self, options, query): with self._index.searcher() as searcher: query = QueryParser('raw', self._index.schema).parse(query) results = searcher.search(query, **options) return results
Example #10
Source File: contacts.py From pixelated-user-agent with GNU Affero General Public License v3.0 | 5 votes |
def search_addresses(searcher, query): restrict_q = Term("tag", "drafts") | Term("tag", "trash") results = [] for field in ['to', 'cc', 'bcc', 'sender']: query_parser = QueryParser(field, searcher.schema) results.append( searcher.search( query_parser.parse("*%s* OR *%s*" % (query.title(), query)), limit=None, mask=restrict_q, groupedby=sorting.FieldFacet( field, allow_overlap=True), terms=True).matched_terms()) return [address[1] for address in flatten(results)]
Example #11
Source File: whoosh_cn_backend.py From izone with MIT License | 5 votes |
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
Example #12
Source File: query.py From kerko with GNU General Public License v3.0 | 5 votes |
def run_query_unique(field_name, value, return_fields=None): """Perform a search query for a single item using an unique key.""" index = open_index() if index: with index.searcher() as searcher: q = QueryParser( field_name, schema=current_app.config['KERKO_COMPOSER'].schema, plugins=[] ).parse(value) results = searcher.search(q, limit=1) if results: return _get_fields(results[0], return_fields) return None
Example #13
Source File: whoosh_cn_backend.py From website with MIT License | 5 votes |
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
Example #14
Source File: get_template_based_result.py From DualRL with MIT License | 4 votes |
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1): schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True)) index_i = re.findall('\d', train_data_path)[0] index_path = "../tmp/ix_index/" + index_i if not os.path.exists(index_path): os.makedirs(index_path) ix = create_in(index_path, schema) writer = ix.writer() def get_cpr(line): lines = line.lower().strip().split('\t') context = '' post = lines[0] response = lines[1] return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8') def load_train_data(file_name, writer): f = open(file_name) for line in f: context, response, post = get_cpr(line) if context != '': writer.add_document(context=context, response=response, post=post) else: writer.add_document(response=response, post=post) writer.commit() def get_query(line, ix): lines = line.strip().split('\t') post = lines[0].decode('utf-8') q2 = QueryParser("post", ix.schema).parse(post) terms = list(q2.all_terms()) query = Or([Term(*x) for x in terms]) return query load_train_data(train_data_path, writer) f = open(test_data_path, 'r') fw_search = open(dst_result_path, 'w') with ix.searcher(weighting=scoring.TF_IDF()) as searcher: c = searcher.collector(limit=10) tlc = TimeLimitCollector(c, timelimit=10.0) for line in f: try: query = get_query(line, ix) searcher.search_with_collector(query, tlc) results = tlc.results() for i in range(min(len(results), save_n_best_search)): fw_search.write( line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n') except Exception as e: print('TimeLimit, ignore it!') print(line) fw_search.close()
Example #15
Source File: boolean.py From cltk with MIT License | 4 votes |
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50): """Send query to a corpus's index. `save_file` is a filename. :type save_file: str >>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library') >>> # results = cltk_index.corpus_query('amicitia') """ _index = open_dir(self.index_path) output_str = '' with _index.searcher() as searcher: _query = QueryParser("content", _index.schema).parse(query) results = searcher.search(_query, limit=None) results.fragmenter.charlimit = None # Allow larger fragments results.fragmenter.maxchars = window_size # Show more context before and after results.fragmenter.surround = surround_size docs_number = searcher.doc_count_all() output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>' for hit in results: author = hit['author'] filepath = hit['path'] output_str += author + '</br>' output_str += filepath + '</br>' with open(filepath) as file_open: file_contents = file_open.read() highlights = hit.highlights("content", text=file_contents, top=10000000) lines = highlights.split('\n') #lines_numbers = [l for l in lines] lines_br = '</br>'.join(lines) lines_number_approx = len(lines) output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>' output_str += lines_br + '</br></br>' if save_file: user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search') output_path = os.path.join(user_dir, save_file + '.html') try: with open(output_path, 'w') as file_open: file_open.write(output_str) except FileNotFoundError: os.mkdir(user_dir) with open(output_path, 'w') as file_open: file_open.write(output_str) else: return output_str
Example #16
Source File: searchfiles.py From txtorg with MIT License | 4 votes |
def run(index, searcher, analyzer, reader, command, content_field="contents"): print 'content_field is', content_field """check to see whether the user specified a field""" print command if command == 'all': myresults = reader.all_doc_ids() print 'Query Completed' else: query = QueryParser(content_field,schema=index.schema).parse(command) myresults = searcher.docs_for_query(query) print 'Query Completed' allDicts = [] allTerms = set() allMetadata = [] termsDocs = dict() scoreDocs = [] for docnum in myresults: #doc = searcher.doc(scoreDoc.doc) vector = searcher.vector_as("frequency", docnum, content_field) #vector = reader.getTermFreqVector(scoreDoc.doc,content_field) if vector is None: continue d = dict() m = dict() # a vector is a generator of tuples -- convert of list # [(u"apple", 3), (u"bear", 2), (u"cab", 2)] #vector = [elt for elt in vector] #vterms = [elt[0] for elt in vector] #vvalues = [elt[1] for elt in vector] #allTerms = allTerms.union(map(lambda x: x.encode('utf-8'),vterms)) # for (t,num) in zip(vterms,vvalues): for (t,num) in vector: allTerms.add(t.encode('utf-8')) d[t.encode('utf-8')] = num if t in termsDocs: termsDocs[t.encode('utf-8')] += 1 else: termsDocs[t.encode('utf-8')] = 1 d["txtorg_id"] = searcher.stored_fields(docnum)["txtorg_id"].encode('utf-8') # Build the metadata for k in searcher.stored_fields(docnum): if k != 'txtorg_id': m[k] = searcher.stored_fields(docnum)[k].encode('utf-8') allDicts.append(d) allMetadata.append(m) scoreDocs.append(docnum) names = set(allTerms) print allMetadata return scoreDocs, allTerms, allDicts, termsDocs, allMetadata