Python whoosh.qparser.QueryParser() Examples

The following are 16 code examples of whoosh.qparser.QueryParser(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module whoosh.qparser , or try the search function .
Example #1
Source File: search.py    From markdown-search with GNU General Public License v2.0 6 votes vote down vote up
def search(self, query_list, fields=None):
        with self.ix.searcher() as searcher:
            query_string = " ".join(query_list)
            query = None
            if "\"" in query_string or ":" in query_string:
                query = QueryParser("content", self.schema).parse(query_string)
            elif len(fields) == 1 and fields[0] == "filename":
                pass
            elif len(fields) == 1 and fields[0] == "tags":
                pass
            elif len(fields) == 2:
                pass
            else:
                fields = ["tags", "headlines", "content", "filename", "doubleemphasiswords", "emphasiswords"]
            if not query:
                query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
            parsed_query = "%s" % query
            print "query: %s" % parsed_query
            results = searcher.search(query, terms=False, scored=True, groupedby="path")
            key_terms = results.key_terms("tags", docs=100, numterms=100)
            tag_cloud = [keyword for keyword, score in key_terms]
            search_result = self.create_search_result(results)

        return parsed_query, search_result, tag_cloud 
Example #2
Source File: search.py    From databrewer with MIT License 5 votes vote down vote up
def search(self, query, search_field='content'):
        if not isinstance(query, Query):
            parser = QueryParser(search_field, self.schema, termclass=Variations)
            query = parser.parse(query)

        with self.index.searcher() as searcher:
            for hit in searcher.search(query, limit=100):
                yield self._decode(hit['data']) 
Example #3
Source File: whoosh_cn_backend.py    From thirtylol with MIT License 5 votes vote down vote up
def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if LOCALS.RAM_STORE is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True 
Example #4
Source File: whoosh_tool.py    From TorCMS with MIT License 5 votes vote down vote up
def __init__(self):
        self.whbase = open_dir("database/whoosh")
        self.parser = QueryParser("content", schema=self.whbase.schema) 
Example #5
Source File: whoosh_cn_backend.py    From Django-blog with MIT License 5 votes vote down vote up
def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(
            connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True 
Example #6
Source File: whooshsearch.py    From pySINDy with MIT License 5 votes vote down vote up
def __init__(self, db_path):
        ensuredir(db_path)
        if index.exists_in(db_path):
            self.index = index.open_dir(db_path)
        else:
            self.index = index.create_in(db_path, schema=self.schema)
        self.qparser = QueryParser('text', self.schema) 
Example #7
Source File: whoosh_cn_backend.py    From blog with Apache License 2.0 5 votes vote down vote up
def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True 
Example #8
Source File: __init__.py    From pixelated-user-agent with GNU Affero General Public License v3.0 5 votes vote down vote up
def _search_tag_groups(self, is_filtering_tags):
        seen = None
        query_parser = QueryParser('tag', self._index.schema)
        options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count}

        with self._index.searcher() as searcher:
            total = searcher.search(query_parser.parse('*'), **options).groups()
            if not is_filtering_tags:
                seen = searcher.search(query_parser.parse("* AND flags:%s" % Status.SEEN), **options).groups()
        return seen, total 
Example #9
Source File: __init__.py    From pixelated-user-agent with GNU Affero General Public License v3.0 5 votes vote down vote up
def _search_with_options(self, options, query):
        with self._index.searcher() as searcher:
            query = QueryParser('raw', self._index.schema).parse(query)
            results = searcher.search(query, **options)
        return results 
Example #10
Source File: contacts.py    From pixelated-user-agent with GNU Affero General Public License v3.0 5 votes vote down vote up
def search_addresses(searcher, query):
    restrict_q = Term("tag", "drafts") | Term("tag", "trash")
    results = []
    for field in ['to', 'cc', 'bcc', 'sender']:
        query_parser = QueryParser(field, searcher.schema)
        results.append(
            searcher.search(
                query_parser.parse("*%s* OR *%s*" % (query.title(), query)),
                limit=None,
                mask=restrict_q,
                groupedby=sorting.FieldFacet(
                    field,
                    allow_overlap=True),
                terms=True).matched_terms())
    return [address[1] for address in flatten(results)] 
Example #11
Source File: whoosh_cn_backend.py    From izone with MIT License 5 votes vote down vote up
def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True 
Example #12
Source File: query.py    From kerko with GNU General Public License v3.0 5 votes vote down vote up
def run_query_unique(field_name, value, return_fields=None):
    """Perform a search query for a single item using an unique key."""
    index = open_index()
    if index:
        with index.searcher() as searcher:
            q = QueryParser(
                field_name,
                schema=current_app.config['KERKO_COMPOSER'].schema,
                plugins=[]
            ).parse(value)
            results = searcher.search(q, limit=1)
            if results:
                return _get_fields(results[0], return_fields)
    return None 
Example #13
Source File: whoosh_cn_backend.py    From website with MIT License 5 votes vote down vote up
def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True 
Example #14
Source File: get_template_based_result.py    From DualRL with MIT License 4 votes vote down vote up
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
    schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
    index_i = re.findall('\d', train_data_path)[0]

    index_path = "../tmp/ix_index/" + index_i
    if not os.path.exists(index_path):
        os.makedirs(index_path)

    ix = create_in(index_path, schema)
    writer = ix.writer()

    def get_cpr(line):
        lines = line.lower().strip().split('\t')
        context = ''
        post = lines[0]
        response = lines[1]
        return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')

    def load_train_data(file_name, writer):
        f = open(file_name)
        for line in f:
            context, response, post = get_cpr(line)
            if context != '':
                writer.add_document(context=context, response=response, post=post)
            else:
                writer.add_document(response=response, post=post)
        writer.commit()

    def get_query(line, ix):
        lines = line.strip().split('\t')
        post = lines[0].decode('utf-8')
        q2 = QueryParser("post", ix.schema).parse(post)
        terms = list(q2.all_terms())
        query = Or([Term(*x) for x in terms])
        return query

    load_train_data(train_data_path, writer)

    f = open(test_data_path, 'r')
    fw_search = open(dst_result_path, 'w')
    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
        c = searcher.collector(limit=10)
        tlc = TimeLimitCollector(c, timelimit=10.0)
        for line in f:
            try:
                query = get_query(line, ix)
                searcher.search_with_collector(query, tlc)
                results = tlc.results()
                for i in range(min(len(results), save_n_best_search)):
                    fw_search.write(
                        line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
            except Exception as e:
                print('TimeLimit, ignore it!')
                print(line)
    fw_search.close() 
Example #15
Source File: boolean.py    From cltk with MIT License 4 votes vote down vote up
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50):
        """Send query to a corpus's index. `save_file` is a filename.
        :type save_file: str

        >>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library')
        >>> # results = cltk_index.corpus_query('amicitia')

        """
        _index = open_dir(self.index_path)

        output_str = ''

        with _index.searcher() as searcher:
            _query = QueryParser("content", _index.schema).parse(query)
            results = searcher.search(_query, limit=None)
            results.fragmenter.charlimit = None

            # Allow larger fragments
            results.fragmenter.maxchars = window_size
            # Show more context before and after
            results.fragmenter.surround = surround_size

            docs_number = searcher.doc_count_all()

            output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>'

            for hit in results:
                author = hit['author']
                filepath = hit['path']
                output_str += author + '</br>'
                output_str += filepath + '</br>'

                with open(filepath) as file_open:
                    file_contents = file_open.read()

                highlights = hit.highlights("content", text=file_contents, top=10000000)
                lines = highlights.split('\n')
                #lines_numbers = [l for l in lines]
                lines_br = '</br>'.join(lines)
                lines_number_approx = len(lines)
                output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>'

                output_str += lines_br + '</br></br>'

        if save_file:
            user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search')
            output_path = os.path.join(user_dir, save_file + '.html')

            try:
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
            except FileNotFoundError:
                os.mkdir(user_dir)
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
        else:
            return output_str 
Example #16
Source File: searchfiles.py    From txtorg with MIT License 4 votes vote down vote up
def run(index, searcher, analyzer, reader, command, content_field="contents"):


    print 'content_field is', content_field
    """check to see whether the user specified a field"""
    print command
    if command == 'all':
        myresults = reader.all_doc_ids()
        print 'Query Completed'
    else:
        query = QueryParser(content_field,schema=index.schema).parse(command)
        myresults = searcher.docs_for_query(query)
        print 'Query Completed'

    allDicts = []
    allTerms = set()
    allMetadata = []
    termsDocs = dict()

    scoreDocs = []
    for docnum in myresults:
        #doc = searcher.doc(scoreDoc.doc)
        vector = searcher.vector_as("frequency", docnum, content_field)
        #vector = reader.getTermFreqVector(scoreDoc.doc,content_field)
        if vector is None: continue

        d = dict()
        m = dict()
        # a vector is a generator  of tuples -- convert of list
        # [(u"apple", 3), (u"bear", 2), (u"cab", 2)]
        #vector = [elt for elt in vector]            
        #vterms = [elt[0] for elt in vector]
        #vvalues = [elt[1] for elt in vector]
        #allTerms = allTerms.union(map(lambda x: x.encode('utf-8'),vterms))        
#        for (t,num) in zip(vterms,vvalues):
        for (t,num) in vector:
            allTerms.add(t.encode('utf-8'))
            d[t.encode('utf-8')] = num
            if t in termsDocs:
                termsDocs[t.encode('utf-8')] += 1
            else:
                termsDocs[t.encode('utf-8')] = 1
        d["txtorg_id"] = searcher.stored_fields(docnum)["txtorg_id"].encode('utf-8')

        # Build the metadata
        for k in searcher.stored_fields(docnum):
            if k != 'txtorg_id':
                m[k] = searcher.stored_fields(docnum)[k].encode('utf-8')
        allDicts.append(d)
        allMetadata.append(m)
        scoreDocs.append(docnum)
    names = set(allTerms)
    print allMetadata

    return scoreDocs, allTerms, allDicts, termsDocs, allMetadata