Python Examples of whoosh.qparser.QueryParser

Source File: search.py From markdown-search with GNU General Public License v2.0

6 votes

def search(self, query_list, fields=None):
        with self.ix.searcher() as searcher:
            query_string = " ".join(query_list)
            query = None
            if "\"" in query_string or ":" in query_string:
                query = QueryParser("content", self.schema).parse(query_string)
            elif len(fields) == 1 and fields[0] == "filename":
                pass
            elif len(fields) == 1 and fields[0] == "tags":
                pass
            elif len(fields) == 2:
                pass
            else:
                fields = ["tags", "headlines", "content", "filename", "doubleemphasiswords", "emphasiswords"]
            if not query:
                query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
            parsed_query = "%s" % query
            print "query: %s" % parsed_query
            results = searcher.search(query, terms=False, scored=True, groupedby="path")
            key_terms = results.key_terms("tags", docs=100, numterms=100)
            tag_cloud = [keyword for keyword, score in key_terms]
            search_result = self.create_search_result(results)

        return parsed_query, search_result, tag_cloud

Source File: search.py From databrewer with MIT License

5 votes

def search(self, query, search_field='content'):
        if not isinstance(query, Query):
            parser = QueryParser(search_field, self.schema, termclass=Variations)
            query = parser.parse(query)

        with self.index.searcher() as searcher:
            for hit in searcher.search(query, limit=100):
                yield self._decode(hit['data'])

Source File: whoosh_cn_backend.py From thirtylol with MIT License

5 votes

def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if LOCALS.RAM_STORE is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True

Source File: whoosh_tool.py From TorCMS with MIT License

5 votes

def __init__(self):
        self.whbase = open_dir("database/whoosh")
        self.parser = QueryParser("content", schema=self.whbase.schema)

Source File: whoosh_cn_backend.py From Django-blog with MIT License

5 votes

def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(
            connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True

Source File: whooshsearch.py From pySINDy with MIT License

5 votes

def __init__(self, db_path):
        ensuredir(db_path)
        if index.exists_in(db_path):
            self.index = index.open_dir(db_path)
        else:
            self.index = index.create_in(db_path, schema=self.schema)
        self.qparser = QueryParser('text', self.schema)

Source File: whoosh_cn_backend.py From blog with Apache License 2.0

5 votes

def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True

Source File: __init__.py From pixelated-user-agent with GNU Affero General Public License v3.0

5 votes

def _search_tag_groups(self, is_filtering_tags):
        seen = None
        query_parser = QueryParser('tag', self._index.schema)
        options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count}

        with self._index.searcher() as searcher:
            total = searcher.search(query_parser.parse('*'), **options).groups()
            if not is_filtering_tags:
                seen = searcher.search(query_parser.parse("* AND flags:%s" % Status.SEEN), **options).groups()
        return seen, total

Source File: __init__.py From pixelated-user-agent with GNU Affero General Public License v3.0

5 votes

def _search_with_options(self, options, query):
        with self._index.searcher() as searcher:
            query = QueryParser('raw', self._index.schema).parse(query)
            results = searcher.search(query, **options)
        return results

Source File: contacts.py From pixelated-user-agent with GNU Affero General Public License v3.0

5 votes

def search_addresses(searcher, query):
    restrict_q = Term("tag", "drafts") | Term("tag", "trash")
    results = []
    for field in ['to', 'cc', 'bcc', 'sender']:
        query_parser = QueryParser(field, searcher.schema)
        results.append(
            searcher.search(
                query_parser.parse("*%s* OR *%s*" % (query.title(), query)),
                limit=None,
                mask=restrict_q,
                groupedby=sorting.FieldFacet(
                    field,
                    allow_overlap=True),
                terms=True).matched_terms())
    return [address[1] for address in flatten(results)]

Source File: whoosh_cn_backend.py From izone with MIT License

5 votes

def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True

Source File: query.py From kerko with GNU General Public License v3.0

5 votes

def run_query_unique(field_name, value, return_fields=None):
    """Perform a search query for a single item using an unique key."""
    index = open_index()
    if index:
        with index.searcher() as searcher:
            q = QueryParser(
                field_name,
                schema=current_app.config['KERKO_COMPOSER'].schema,
                plugins=[]
            ).parse(value)
            results = searcher.search(q, limit=1)
            if results:
                return _get_fields(results[0], return_fields)
    return None

Source File: whoosh_cn_backend.py From website with MIT License

5 votes

def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True

Source File: get_template_based_result.py From DualRL with MIT License

4 votes

def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
    schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
    index_i = re.findall('\d', train_data_path)[0]

    index_path = "../tmp/ix_index/" + index_i
    if not os.path.exists(index_path):
        os.makedirs(index_path)

    ix = create_in(index_path, schema)
    writer = ix.writer()

    def get_cpr(line):
        lines = line.lower().strip().split('\t')
        context = ''
        post = lines[0]
        response = lines[1]
        return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')

    def load_train_data(file_name, writer):
        f = open(file_name)
        for line in f:
            context, response, post = get_cpr(line)
            if context != '':
                writer.add_document(context=context, response=response, post=post)
            else:
                writer.add_document(response=response, post=post)
        writer.commit()

    def get_query(line, ix):
        lines = line.strip().split('\t')
        post = lines[0].decode('utf-8')
        q2 = QueryParser("post", ix.schema).parse(post)
        terms = list(q2.all_terms())
        query = Or([Term(*x) for x in terms])
        return query

    load_train_data(train_data_path, writer)

    f = open(test_data_path, 'r')
    fw_search = open(dst_result_path, 'w')
    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
        c = searcher.collector(limit=10)
        tlc = TimeLimitCollector(c, timelimit=10.0)
        for line in f:
            try:
                query = get_query(line, ix)
                searcher.search_with_collector(query, tlc)
                results = tlc.results()
                for i in range(min(len(results), save_n_best_search)):
                    fw_search.write(
                        line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
            except Exception as e:
                print('TimeLimit, ignore it!')
                print(line)
    fw_search.close()

Source File: boolean.py From cltk with MIT License

4 votes

def corpus_query(self, query, save_file=None, window_size=300, surround_size=50):
        """Send query to a corpus's index. `save_file` is a filename.
        :type save_file: str

        >>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library')
        >>> # results = cltk_index.corpus_query('amicitia')

        """
        _index = open_dir(self.index_path)

        output_str = ''

        with _index.searcher() as searcher:
            _query = QueryParser("content", _index.schema).parse(query)
            results = searcher.search(_query, limit=None)
            results.fragmenter.charlimit = None

            # Allow larger fragments
            results.fragmenter.maxchars = window_size
            # Show more context before and after
            results.fragmenter.surround = surround_size

            docs_number = searcher.doc_count_all()

            output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>'

            for hit in results:
                author = hit['author']
                filepath = hit['path']
                output_str += author + '</br>'
                output_str += filepath + '</br>'

                with open(filepath) as file_open:
                    file_contents = file_open.read()

                highlights = hit.highlights("content", text=file_contents, top=10000000)
                lines = highlights.split('\n')
                #lines_numbers = [l for l in lines]
                lines_br = '</br>'.join(lines)
                lines_number_approx = len(lines)
                output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>'

                output_str += lines_br + '</br></br>'

        if save_file:
            user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search')
            output_path = os.path.join(user_dir, save_file + '.html')

            try:
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
            except FileNotFoundError:
                os.mkdir(user_dir)
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
        else:
            return output_str

Source File: searchfiles.py From txtorg with MIT License

4 votes

def run(index, searcher, analyzer, reader, command, content_field="contents"):


    print 'content_field is', content_field
    """check to see whether the user specified a field"""
    print command
    if command == 'all':
        myresults = reader.all_doc_ids()
        print 'Query Completed'
    else:
        query = QueryParser(content_field,schema=index.schema).parse(command)
        myresults = searcher.docs_for_query(query)
        print 'Query Completed'

    allDicts = []
    allTerms = set()
    allMetadata = []
    termsDocs = dict()

    scoreDocs = []
    for docnum in myresults:
        #doc = searcher.doc(scoreDoc.doc)
        vector = searcher.vector_as("frequency", docnum, content_field)
        #vector = reader.getTermFreqVector(scoreDoc.doc,content_field)
        if vector is None: continue

        d = dict()
        m = dict()
        # a vector is a generator  of tuples -- convert of list
        # [(u"apple", 3), (u"bear", 2), (u"cab", 2)]
        #vector = [elt for elt in vector]            
        #vterms = [elt[0] for elt in vector]
        #vvalues = [elt[1] for elt in vector]
        #allTerms = allTerms.union(map(lambda x: x.encode('utf-8'),vterms))        
#        for (t,num) in zip(vterms,vvalues):
        for (t,num) in vector:
            allTerms.add(t.encode('utf-8'))
            d[t.encode('utf-8')] = num
            if t in termsDocs:
                termsDocs[t.encode('utf-8')] += 1
            else:
                termsDocs[t.encode('utf-8')] = 1
        d["txtorg_id"] = searcher.stored_fields(docnum)["txtorg_id"].encode('utf-8')

        # Build the metadata
        for k in searcher.stored_fields(docnum):
            if k != 'txtorg_id':
                m[k] = searcher.stored_fields(docnum)[k].encode('utf-8')
        allDicts.append(d)
        allMetadata.append(m)
        scoreDocs.append(docnum)
    names = set(allTerms)
    print allMetadata

    return scoreDocs, allTerms, allDicts, termsDocs, allMetadata

Python whoosh.qparser.QueryParser() Examples