Python elasticsearch_dsl.Q Examples

The following are 30 code examples of elasticsearch_dsl.Q(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch_dsl , or try the search function .
Example #1
Source File: search_permissions.py    From invenio-app-ils with MIT License 7 votes vote down vote up
def search_filter_record_permissions():
    """Filter list of results by `_access` and `restricted` fields."""
    if not has_request_context() or backoffice_permission().allows(g.identity):
        return Q()

    # A record is public if `restricted` field False or missing
    restricted_field_missing = ~Q("exists", field="restricted")
    is_restricted = restricted_field_missing | Q("term", restricted=False)

    combined_filter = is_restricted

    if current_app.config.get("ILS_RECORDS_EXPLICIT_PERMISSIONS_ENABLED"):
        # if `_access`, check `_access.read` against the user. It takes
        # precedence over `restricted`.
        # if not `_access`, check if open access as before.
        _access_field_exists = Q("exists", field="_access.read")
        provides = _get_user_provides()
        user_can_read = _access_field_exists & Q(
            "terms", **{"_access.read": provides}
        )
        combined_filter = user_can_read | (
            ~_access_field_exists & ~is_restricted
        )

    return Q("bool", filter=[combined_filter]) 
Example #2
Source File: elastic.py    From ivre with GNU General Public License v3.0 6 votes vote down vote up
def searchport(port, protocol='tcp', state='open', neg=False):
        """Filters (if `neg` == True, filters out) records with
        specified protocol/port at required state. Be aware that when
        a host has a lot of ports filtered or closed, it will not
        report all of them, but only a summary, and thus the filter
        might not work as expected. This filter will always work to
        find open ports.

        """
        if port == "host":
            res = Q("nested", path="ports", query=Q("match", ports__port=-1))
        elif state == "open":
            res = Q("match", **{"openports.%s.ports" % protocol: port})
        else:
            res = Q("nested", path="ports", query=(
                Q("match", ports__port=port) &
                Q("match", ports__protocol=protocol) &
                Q("match", ports__state_state=state)
            ))
        if neg:
            return ~res
        return res 
Example #3
Source File: util.py    From open-ledger with MIT License 6 votes vote down vote up
def correct_license_capitalization(self, provider='europeana', end=None):
        """[#186] Correct license capitalization"""
        s = Search()
        q = Q('term', provider=provider)
        s = s.query(q)
        response = s.execute()
        total = response.hits.total
        log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
        mod_count = 0
        count = 0
        for r in s.scan():
            if not r.license.islower():
                img = search.Image.get(id=r.identifier)
                log.debug("[%d] Changing license %s to %s", count, img.license, img.license.lower())
                img.update(license=img.license.lower())
                mod_count += 1
            count += 1
        log.info("Modified %d records in search engine", mod_count) 
Example #4
Source File: es_search.py    From seqr with GNU Affero General Public License v3.0 6 votes vote down vote up
def _pathogenicity_filter(pathogenicity):
    clinvar_filters = pathogenicity.get('clinvar', [])
    hgmd_filters = pathogenicity.get('hgmd', [])

    pathogenicity_filter = None
    if clinvar_filters:
        clinvar_clinical_significance_terms = set()
        for clinvar_filter in clinvar_filters:
            clinvar_clinical_significance_terms.update(CLINVAR_SIGNFICANCE_MAP.get(clinvar_filter, []))
        pathogenicity_filter = Q('terms', clinvar_clinical_significance=sorted(list(clinvar_clinical_significance_terms)))

    if hgmd_filters:
        hgmd_class = set()
        for hgmd_filter in hgmd_filters:
            hgmd_class.update(HGMD_CLASS_MAP.get(hgmd_filter, []))

        hgmd_q = Q('terms', hgmd_class=sorted(list(hgmd_class)))
        pathogenicity_filter = pathogenicity_filter | hgmd_q if pathogenicity_filter else hgmd_q

    return pathogenicity_filter 
Example #5
Source File: util.py    From open-ledger with MIT License 6 votes vote down vote up
def correct_orphan_records(self, provider='europeana', end=None):
        """[#185] Delete records from the search engine which aren't found in the database"""
        s = Search()
        q = Q('term', provider=provider)
        s = s.query(q)
        response = s.execute()
        total = response.hits.total
        # A file extracted from the production database listing all of the europeana identifiers
        identifier_file = '/tmp/europeana-identifiers.json'
        db_identifiers = set(json.load(open(identifier_file)))
        total_in_db = len(db_identifiers)
        log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
        log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db)
        deleted_count = 0
        for r in s.scan():
            if r.identifier not in db_identifiers:
                img = search.Image.get(id=r.identifier)
                log.debug("Going to delete image %s", img)
                deleted_count += 1
        log.info("Deleted %d from search engine", deleted_count) 
Example #6
Source File: site_views.py    From open-ledger with MIT License 6 votes vote down vote up
def about(request):
    """Information about the current site, its goals, and what content is loaded"""
    # Provider counts
    providers = cache.get_or_set(CACHE_STATS_NAME, [], CACHE_STATS_DURATION)
    if not providers:
        for provider in sorted(settings.PROVIDERS.keys()):
            s = Search()
            q = Q('term', provider=provider)
            s = s.query(q)
            response = s.execute()
            if response.hits.total > 0:
                data = settings.PROVIDERS[provider]
                total = intcomma(response.hits.total)
                data.update({'hits': total})
                providers.append(data)
        # All results
        s = Search()
        response = s.execute()
        total = intcomma(response.hits.total)
        providers.append({'display_name': 'Total', 'hits': total})
        cache.set(CACHE_STATS_NAME, providers)
    return render(request, "about.html", {'providers': providers}) 
Example #7
Source File: es_search.py    From seqr with GNU Affero General Public License v3.0 6 votes vote down vote up
def filter_by_frequency(self, frequencies):
        q = Q()
        for pop, freqs in sorted(frequencies.items()):
            if freqs.get('af') is not None:
                filter_field = next(
                    (field_key for field_key in POPULATIONS[pop]['filter_AF']
                     if any(field_key in index_metadata['fields'] for index_metadata in self.index_metadata.values())),
                    POPULATIONS[pop]['AF'])
                q &= _pop_freq_filter(filter_field, freqs['af'])
            elif freqs.get('ac') is not None:
                q &= _pop_freq_filter(POPULATIONS[pop]['AC'], freqs['ac'])

            if freqs.get('hh') is not None:
                q &= _pop_freq_filter(POPULATIONS[pop]['Hom'], freqs['hh'])
                q &= _pop_freq_filter(POPULATIONS[pop]['Hemi'], freqs['hh'])
        self.filter(q) 
Example #8
Source File: update_politician_pictures.py    From politicos with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_pictures(client):
    es_data = (
        Search(using=client, index='politicians')
        .query(Q('bool', must=[Q('match', ano_eleicao=ANO_ELEICAO)]))
        .source(['sq_candidato', 'sg_ue'])
        .scan()
    )
    data = []
    for hit in es_data:
        url = 'http://divulgacandcontas.tse.jus.br/divulga'
        url = f'{url}/rest/v1/candidatura/buscar'
        url = f'{url}/{ANO_ELEICAO}/{hit.sg_ue}/2022802018'
        url = f'{url}/candidato/{hit.sq_candidato}'
        data.append(
            {
                'doc_id': hit.meta.id,
                'url': url,
                'sg_ue': hit.sg_ue,
                'sq_candidato': hit.sq_candidato,
            }
        )
    return data 
Example #9
Source File: study_ceres_onion.py    From grimoirelab-elk with GNU General Public License v3.0 6 votes vote down vote up
def process(self, items_block):
        """Process a DataFrame to compute Onion.

        :param items_block: items to be processed. Expects to find a pandas DataFrame.
        """

        logger.debug("{} Authors to process: {}".format(self.__log_prefix, len(items_block)))

        onion_enrich = Onion(items_block)
        df_onion = onion_enrich.enrich(member_column=ESOnionConnector.AUTHOR_UUID,
                                       events_column=ESOnionConnector.CONTRIBUTIONS)

        # Get and store Quarter as String
        df_onion['quarter'] = df_onion[ESOnionConnector.TIMEFRAME].map(lambda x: str(pandas.Period(x, 'Q')))

        # Add metadata: enriched on timestamp
        df_onion['metadata__enriched_on'] = datetime.utcnow().isoformat()
        df_onion['data_source'] = self.data_source
        df_onion['grimoire_creation_date'] = df_onion[ESOnionConnector.TIMEFRAME]

        logger.debug("{} Final new events: {}".format(self.__log_prefix, len(df_onion)))

        return self.ProcessResults(processed=len(df_onion), out_items=df_onion) 
Example #10
Source File: fields.py    From udata with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_value_filter(self, filter_value):
        '''
        Fix here until upstream PR is merged
        https://github.com/elastic/elasticsearch-dsl-py/pull/473
        '''
        self.validate_parameter(filter_value)
        f, t = self._ranges[filter_value]
        limits = {}
        # lt and gte to ensure non-overlapping ranges
        if f is not None:
            limits['gte'] = f
        if t is not None:
            limits['lt'] = t

        return Q('range', **{
            self._params['field']: limits
        }) 
Example #11
Source File: test_querysets.py    From django-zombodb with MIT License 6 votes vote down vote up
def test_dsl_search_no_limit(self):
        # duplicate tj and soleil
        self.tj.pk = None
        self.tj.save()
        self.soleil.pk = None
        self.soleil.save()

        results = Restaurant.objects.dsl_search(
            ElasticsearchQ('match', street='skillman'),
            sort=True,
            limit=None)

        self.assertEqual(len(results), 4)
        self.assertEqual(
            [r.name for r in results],
            [self.soleil.name, self.soleil.name, self.tj.name, self.tj.name]) 
Example #12
Source File: WASEQuery.py    From WASE with GNU General Public License v3.0 6 votes vote down vote up
def query_missing(s, field, name, methods=None, responsecodes=None, invert=False):
    # main query
    q = Q("match", ** { field: name })
    if not invert:
        q = ~q
    s.query = q

    # add filters
    ## method
    if methods:
        s = s.filter("terms", ** { 'request.method': methods })
    ## response codes
    if responsecodes:
        for rc in responsecodes:
            rcrange = rc.split("-")
            if len(rcrange) == 2:
                s = s.filter("range", ** { 'response.status': { "gte": int(rcrange[0]), "lte": int(rcrange[1]) } })
            else:
                s = s.filter("term", ** { 'response.status': rc })

    print_debug(s.to_dict())
    return s 
Example #13
Source File: WASEQuery.py    From WASE with GNU General Public License v3.0 6 votes vote down vote up
def query_vals(s, field, name, values, invert):
    # match documents where given field value name is present, if required
    if values:
        q = Q("nested", path=field, query=Q("wildcard", ** { field + ".value.keyword": values }))
        if invert:
            s.query = ~q
        else:
            s.query = q
    else:
        s.query = Q()

    # 1. descent into response.headers/request.parameters
    # 2. filter given header
    # 3. aggregate values
    # 4. jump back into main document
    # 5. aggregate URLs
    s.aggs.bucket("field", "nested", path=field)\
            .bucket("valuefilter", "filter", Q("match", ** { field + ".name": name }))\
            .bucket("values", "terms", field=field + ".value.keyword", size=args.size)\
            .bucket("main", "reverse_nested")\
            .bucket("urls", "terms", field="request.url.keyword", size=args.size)
    return s 
Example #14
Source File: test_querysets.py    From django-zombodb with MIT License 6 votes vote down vote up
def test_dsl_search(self):
        results = Restaurant.objects.dsl_search(ElasticsearchQ(
            'bool',
            must=[
                ElasticsearchQ('match', street='Skillman Ave'),
                ElasticsearchQ('match', categories='French')
            ]
        ))
        self.assertCountEqual(results, [self.soleil])

        results = Restaurant.objects.dsl_search(ElasticsearchQ(
            'bool',
            must=[
                ElasticsearchQ('match', street='Skillman Ave'),
                ElasticsearchQ('match', zip_code='11377')
            ]
        ))
        self.assertCountEqual(results, [self.tj, self.soleil])

        results = Restaurant.objects.dsl_search(Term(email='alcove@example.org'))
        self.assertCountEqual(results, [self.alcove]) 
Example #15
Source File: es_wrapper.py    From bitshares-explorer-api with MIT License 6 votes vote down vote up
def get_trade_history(size=10, from_date='2015-10-10', to_date='now', sort_by='-operation_id_num',
                      search_after=None, base="1.3.0", quote="1.3.121"):

    s = Search(using=es, index="bitshares-*")

    s = s.extra(size=size)
    if search_after and search_after != '':
        s = s.extra(search_after=search_after.split(','))

    q = Q()
    q = q & Q("match", operation_type=4)
    q = q & Q("match", operation_history__op_object__is_maker=True)

    q = q & Q("match", operation_history__op_object__fill_price__base__asset_id=base)
    q = q & Q("match", operation_history__op_object__fill_price__quote__asset_id=quote)

    range_query = Q("range", block_data__block_time={'gte': from_date, 'lte': to_date})
    s.query = q & range_query

    s = s.sort(*sort_by.split(','))
    response = s.execute()
    verify_es_response(response)

    return [hit.to_dict() for hit in response] 
Example #16
Source File: test_search.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_query_can_be_assigned_to():
    s = search.Search()

    q = Q('match', title='python')
    s.query = q

    assert s.query._proxied is q 
Example #17
Source File: utils.py    From userline with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_dsl_logoff_query(screen):
	q = None
	for evtid in config.EVENTS_LOGOFF:
		tmp = Q("match",event_identifier=evtid)
		if q is None:
			q = tmp
		else:
			q = q | tmp

	if screen is True:
		for evtid in config.EVENTS_LOGOFF_SCREEN:
			q = q | Q("match",event_identifier=evtid)

	return q 
Example #18
Source File: search.py    From nyaa with GNU General Public License v3.0 5 votes vote down vote up
def _es_name_exact_phrase(literal):
    ''' Returns a Query for a phrase match on the display_name for a given literal '''
    return Q({
        'match_phrase': {
            'display_name.exact': {
                'query': literal,
                'analyzer': 'exact_analyzer'
            }
        }
    }) 
Example #19
Source File: test_querysets.py    From django-zombodb with MIT License 5 votes vote down vote up
def test_dsl_search_validate(self):
        query = ElasticsearchQ('bool')
        query.name = 'wrong'
        with self.assertRaises(InvalidElasticsearchQuery) as cm:
            Restaurant.objects.dsl_search(query, validate=True)
        self.assertRegex(
            str(cm.exception),
            "Invalid Elasticsearch query: (.+)") 
Example #20
Source File: test_querysets.py    From django-zombodb with MIT License 5 votes vote down vote up
def test_dsl_search_score_attr(self):
        results = Restaurant.objects.dsl_search(
            ElasticsearchQ('match', street='skillman'),
            sort=True,
            score_attr='custom_score')

        self.assertEqual(len(results), 2)
        for r in results:
            self.assertTrue(hasattr(r, 'custom_score'))
            self.assertGreater(r.custom_score, 0) 
Example #21
Source File: crawl.py    From weapp-zhihulive with Apache License 2.0 5 votes vote down vote up
def parse_zhuanlan_link(self, response):
        posts = await response.json()

        if response.status == 200 and posts:
            for post in posts:
                cover = post['titleImage']
                if not cover:
                    continue
                s = Live.search()
                title = post['title']
                for sep in ('-', '—'):
                    if sep in title:
                        title = title.split(sep)[-1].strip()
                speaker_id = post['author']['hash']
                zid = post['url'].split('/')[-1]
                s = s.query(Q('match_phrase', subject=title))
                lives = await s.execute()
                for live in lives:
                    if live.speaker and live.speaker.speaker_id == speaker_id:
                        await self.update_live(zid, cover, live)
                        break
                else:
                    match = LIVE_REGEX.search(post['content'])
                    if match:
                        live_id = match.group(2)
                        try:
                            live = await Live.get(live_id)
                        except NotFoundError:
                            pass
                        else:
                            await self.update_live(zid, cover, live)

            return get_next_url(response.url) 
Example #22
Source File: more_like_this_validator.py    From texta with GNU General Public License v3.0 5 votes vote down vote up
def validate_filter(filter: dict):
    try:
        query = Q(filter)
    except Exception as e:
        logging.getLogger(ERROR_LOGGER).exception("Could not parse filter query {}.".format(filter))
        raise serializers.ValidationError("Could not parse the filter, query. Make sure you have not included the top 'query' key!") 
Example #23
Source File: test_querysets.py    From django-zombodb with MIT License 5 votes vote down vote up
def test_dsl_search_sort(self):
        results = Restaurant.objects.dsl_search(
            ElasticsearchQ(
                'bool',
                should=[
                    ElasticsearchQ('match', categories='sushi'),
                    ElasticsearchQ('match', categories='asian'),
                    ElasticsearchQ('match', categories='japanese'),
                    ElasticsearchQ('match', categories='french'),
                ],
                minimum_should_match=1
            ),
            validate=True,
            sort=True)
        self.assertEqual(list(results), [self.tj, self.soleil])

        results = Restaurant.objects.dsl_search(
            ElasticsearchQ(
                'bool',
                should=[
                    ElasticsearchQ('match', categories='french'),
                    ElasticsearchQ('match', categories='coffee'),
                    ElasticsearchQ('match', categories='european'),
                    ElasticsearchQ('match', categories='sushi'),
                ],
                minimum_should_match=1
            ),
            sort=True)
        self.assertEqual(list(results), [self.soleil, self.tj]) 
Example #24
Source File: tor_elasticsearch.py    From freshonions-torscraper with GNU Affero General Public License v3.0 5 votes vote down vote up
def elasticsearch_retrieve_page_by_id(page_id):
    query = Search().filter(Q("term", nid=int(page_id)))[:1]
    result = query.execute()
    if result.hits.total == 0:
        return None
    return result.hits[0] 
Example #25
Source File: esnotifications.py    From stethoscope with Apache License 2.0 5 votes vote down vote up
def create_query_for_email(self, search, email):
    return search.query(elasticsearch_dsl.Q({"match": {'email': email}})) 
Example #26
Source File: tor_elasticsearch.py    From freshonions-torscraper with GNU Affero General Public License v3.0 5 votes vote down vote up
def elasticsearch_delete_old():
    _from = NEVER
    _to   = datetime.now() - timedelta(days=30)
    query = Search().filter(Q("range", visited_at={'from': _from, 'to': _to}))
    result = query.delete() 
Example #27
Source File: es_wrapper.py    From bitshares-explorer-api with MIT License 5 votes vote down vote up
def get_trx(trx, from_=0, size=10):
    s = Search(using=es, index="bitshares-*", extra={"size": size, "from": from_})
    s.query = Q("match", block_data__trx_id=trx)

    response = s.execute()
    verify_es_response(response)

    return [hit.to_dict() for hit in response] 
Example #28
Source File: es_wrapper.py    From bitshares-explorer-api with MIT License 5 votes vote down vote up
def is_alive():
    find_string = datetime.utcnow().strftime("%Y-%m")
    from_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")

    s = Search(using=es, index="bitshares-" + find_string)
    s.query = Q("range", block_data__block_time={'gte': from_date, 'lte': "now"})
    s.aggs.metric("max_block_time", "max", field="block_data.block_time")

    json_response = {
        "server_time": datetime.utcnow(),
        "head_block_timestamp": None,
        "head_block_time": None
    }

    response = s.execute()
    verify_es_response(response)

    if response.aggregations.max_block_time.value is not None:
        json_response["head_block_time"] = str(response.aggregations.max_block_time.value_as_string)
        json_response["head_block_timestamp"] = response.aggregations.max_block_time.value
        json_response["deltatime"] = abs((datetime.utcfromtimestamp(json_response["head_block_timestamp"] / 1000) - json_response["server_time"]).total_seconds())
        if json_response["deltatime"] < 30:
            json_response["status"] = "ok"
        else:
            json_response["status"] = "out_of_sync"
            json_response["error"] = "last_block_too_old"
    else:
        json_response["status"] = "out_of_sync"
        json_response["deltatime"] = "Infinite"
        json_response["query_index"] = find_string
        json_response["query_from_date"] = from_date
        json_response["error"] = "no_blocks_last_24_hours"

    return json_response 
Example #29
Source File: es_wrapper.py    From bitshares-explorer-api with MIT License 5 votes vote down vote up
def get_single_operation(operation_id):
    s = Search(using=es, index="bitshares-*")
    s.query = Q("match", account_history__operation_id=operation_id)

    response = s.execute()
    verify_es_response(response)

    return [hit.to_dict() for hit in response][0] 
Example #30
Source File: utils.py    From userline with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_last_shutdown(index,maxtstamp,pattern):
	"""
	Look for the last shutdown event
	"""

	conn = connections.get_connection()

	q = [ \
		Q('match',data_type='windows:evtx:record') , \
		Q('match',event_identifier=config.EVENT_SHUTDOWN)
	]

	if pattern:
		q.append(Q('query_string',query=pattern,analyze_wildcard=True))

	s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte':maxtstamp}).sort('-datetime')[0:0]
	s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('shutdown','top_hits',size=1)

	res = s.execute()
	ret = {}
	for item in res.aggregations['computer']['buckets']:
		ret[item['key']] = item['shutdown']['hits']['hits'][0]

	if len(ret.keys()) == 0:
		ret = None

	return ret