Python elasticsearch_dsl.Q Examples
The following are 30
code examples of elasticsearch_dsl.Q().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
elasticsearch_dsl
, or try the search function
.
Example #1
Source File: search_permissions.py From invenio-app-ils with MIT License | 7 votes |
def search_filter_record_permissions(): """Filter list of results by `_access` and `restricted` fields.""" if not has_request_context() or backoffice_permission().allows(g.identity): return Q() # A record is public if `restricted` field False or missing restricted_field_missing = ~Q("exists", field="restricted") is_restricted = restricted_field_missing | Q("term", restricted=False) combined_filter = is_restricted if current_app.config.get("ILS_RECORDS_EXPLICIT_PERMISSIONS_ENABLED"): # if `_access`, check `_access.read` against the user. It takes # precedence over `restricted`. # if not `_access`, check if open access as before. _access_field_exists = Q("exists", field="_access.read") provides = _get_user_provides() user_can_read = _access_field_exists & Q( "terms", **{"_access.read": provides} ) combined_filter = user_can_read | ( ~_access_field_exists & ~is_restricted ) return Q("bool", filter=[combined_filter])
Example #2
Source File: elastic.py From ivre with GNU General Public License v3.0 | 6 votes |
def searchport(port, protocol='tcp', state='open', neg=False): """Filters (if `neg` == True, filters out) records with specified protocol/port at required state. Be aware that when a host has a lot of ports filtered or closed, it will not report all of them, but only a summary, and thus the filter might not work as expected. This filter will always work to find open ports. """ if port == "host": res = Q("nested", path="ports", query=Q("match", ports__port=-1)) elif state == "open": res = Q("match", **{"openports.%s.ports" % protocol: port}) else: res = Q("nested", path="ports", query=( Q("match", ports__port=port) & Q("match", ports__protocol=protocol) & Q("match", ports__state_state=state) )) if neg: return ~res return res
Example #3
Source File: util.py From open-ledger with MIT License | 6 votes |
def correct_license_capitalization(self, provider='europeana', end=None): """[#186] Correct license capitalization""" s = Search() q = Q('term', provider=provider) s = s.query(q) response = s.execute() total = response.hits.total log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL) mod_count = 0 count = 0 for r in s.scan(): if not r.license.islower(): img = search.Image.get(id=r.identifier) log.debug("[%d] Changing license %s to %s", count, img.license, img.license.lower()) img.update(license=img.license.lower()) mod_count += 1 count += 1 log.info("Modified %d records in search engine", mod_count)
Example #4
Source File: es_search.py From seqr with GNU Affero General Public License v3.0 | 6 votes |
def _pathogenicity_filter(pathogenicity): clinvar_filters = pathogenicity.get('clinvar', []) hgmd_filters = pathogenicity.get('hgmd', []) pathogenicity_filter = None if clinvar_filters: clinvar_clinical_significance_terms = set() for clinvar_filter in clinvar_filters: clinvar_clinical_significance_terms.update(CLINVAR_SIGNFICANCE_MAP.get(clinvar_filter, [])) pathogenicity_filter = Q('terms', clinvar_clinical_significance=sorted(list(clinvar_clinical_significance_terms))) if hgmd_filters: hgmd_class = set() for hgmd_filter in hgmd_filters: hgmd_class.update(HGMD_CLASS_MAP.get(hgmd_filter, [])) hgmd_q = Q('terms', hgmd_class=sorted(list(hgmd_class))) pathogenicity_filter = pathogenicity_filter | hgmd_q if pathogenicity_filter else hgmd_q return pathogenicity_filter
Example #5
Source File: util.py From open-ledger with MIT License | 6 votes |
def correct_orphan_records(self, provider='europeana', end=None): """[#185] Delete records from the search engine which aren't found in the database""" s = Search() q = Q('term', provider=provider) s = s.query(q) response = s.execute() total = response.hits.total # A file extracted from the production database listing all of the europeana identifiers identifier_file = '/tmp/europeana-identifiers.json' db_identifiers = set(json.load(open(identifier_file))) total_in_db = len(db_identifiers) log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL) log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db) deleted_count = 0 for r in s.scan(): if r.identifier not in db_identifiers: img = search.Image.get(id=r.identifier) log.debug("Going to delete image %s", img) deleted_count += 1 log.info("Deleted %d from search engine", deleted_count)
Example #6
Source File: site_views.py From open-ledger with MIT License | 6 votes |
def about(request): """Information about the current site, its goals, and what content is loaded""" # Provider counts providers = cache.get_or_set(CACHE_STATS_NAME, [], CACHE_STATS_DURATION) if not providers: for provider in sorted(settings.PROVIDERS.keys()): s = Search() q = Q('term', provider=provider) s = s.query(q) response = s.execute() if response.hits.total > 0: data = settings.PROVIDERS[provider] total = intcomma(response.hits.total) data.update({'hits': total}) providers.append(data) # All results s = Search() response = s.execute() total = intcomma(response.hits.total) providers.append({'display_name': 'Total', 'hits': total}) cache.set(CACHE_STATS_NAME, providers) return render(request, "about.html", {'providers': providers})
Example #7
Source File: es_search.py From seqr with GNU Affero General Public License v3.0 | 6 votes |
def filter_by_frequency(self, frequencies): q = Q() for pop, freqs in sorted(frequencies.items()): if freqs.get('af') is not None: filter_field = next( (field_key for field_key in POPULATIONS[pop]['filter_AF'] if any(field_key in index_metadata['fields'] for index_metadata in self.index_metadata.values())), POPULATIONS[pop]['AF']) q &= _pop_freq_filter(filter_field, freqs['af']) elif freqs.get('ac') is not None: q &= _pop_freq_filter(POPULATIONS[pop]['AC'], freqs['ac']) if freqs.get('hh') is not None: q &= _pop_freq_filter(POPULATIONS[pop]['Hom'], freqs['hh']) q &= _pop_freq_filter(POPULATIONS[pop]['Hemi'], freqs['hh']) self.filter(q)
Example #8
Source File: update_politician_pictures.py From politicos with GNU Affero General Public License v3.0 | 6 votes |
def get_pictures(client): es_data = ( Search(using=client, index='politicians') .query(Q('bool', must=[Q('match', ano_eleicao=ANO_ELEICAO)])) .source(['sq_candidato', 'sg_ue']) .scan() ) data = [] for hit in es_data: url = 'http://divulgacandcontas.tse.jus.br/divulga' url = f'{url}/rest/v1/candidatura/buscar' url = f'{url}/{ANO_ELEICAO}/{hit.sg_ue}/2022802018' url = f'{url}/candidato/{hit.sq_candidato}' data.append( { 'doc_id': hit.meta.id, 'url': url, 'sg_ue': hit.sg_ue, 'sq_candidato': hit.sq_candidato, } ) return data
Example #9
Source File: study_ceres_onion.py From grimoirelab-elk with GNU General Public License v3.0 | 6 votes |
def process(self, items_block): """Process a DataFrame to compute Onion. :param items_block: items to be processed. Expects to find a pandas DataFrame. """ logger.debug("{} Authors to process: {}".format(self.__log_prefix, len(items_block))) onion_enrich = Onion(items_block) df_onion = onion_enrich.enrich(member_column=ESOnionConnector.AUTHOR_UUID, events_column=ESOnionConnector.CONTRIBUTIONS) # Get and store Quarter as String df_onion['quarter'] = df_onion[ESOnionConnector.TIMEFRAME].map(lambda x: str(pandas.Period(x, 'Q'))) # Add metadata: enriched on timestamp df_onion['metadata__enriched_on'] = datetime.utcnow().isoformat() df_onion['data_source'] = self.data_source df_onion['grimoire_creation_date'] = df_onion[ESOnionConnector.TIMEFRAME] logger.debug("{} Final new events: {}".format(self.__log_prefix, len(df_onion))) return self.ProcessResults(processed=len(df_onion), out_items=df_onion)
Example #10
Source File: fields.py From udata with GNU Affero General Public License v3.0 | 6 votes |
def get_value_filter(self, filter_value): ''' Fix here until upstream PR is merged https://github.com/elastic/elasticsearch-dsl-py/pull/473 ''' self.validate_parameter(filter_value) f, t = self._ranges[filter_value] limits = {} # lt and gte to ensure non-overlapping ranges if f is not None: limits['gte'] = f if t is not None: limits['lt'] = t return Q('range', **{ self._params['field']: limits })
Example #11
Source File: test_querysets.py From django-zombodb with MIT License | 6 votes |
def test_dsl_search_no_limit(self): # duplicate tj and soleil self.tj.pk = None self.tj.save() self.soleil.pk = None self.soleil.save() results = Restaurant.objects.dsl_search( ElasticsearchQ('match', street='skillman'), sort=True, limit=None) self.assertEqual(len(results), 4) self.assertEqual( [r.name for r in results], [self.soleil.name, self.soleil.name, self.tj.name, self.tj.name])
Example #12
Source File: WASEQuery.py From WASE with GNU General Public License v3.0 | 6 votes |
def query_missing(s, field, name, methods=None, responsecodes=None, invert=False): # main query q = Q("match", ** { field: name }) if not invert: q = ~q s.query = q # add filters ## method if methods: s = s.filter("terms", ** { 'request.method': methods }) ## response codes if responsecodes: for rc in responsecodes: rcrange = rc.split("-") if len(rcrange) == 2: s = s.filter("range", ** { 'response.status': { "gte": int(rcrange[0]), "lte": int(rcrange[1]) } }) else: s = s.filter("term", ** { 'response.status': rc }) print_debug(s.to_dict()) return s
Example #13
Source File: WASEQuery.py From WASE with GNU General Public License v3.0 | 6 votes |
def query_vals(s, field, name, values, invert): # match documents where given field value name is present, if required if values: q = Q("nested", path=field, query=Q("wildcard", ** { field + ".value.keyword": values })) if invert: s.query = ~q else: s.query = q else: s.query = Q() # 1. descent into response.headers/request.parameters # 2. filter given header # 3. aggregate values # 4. jump back into main document # 5. aggregate URLs s.aggs.bucket("field", "nested", path=field)\ .bucket("valuefilter", "filter", Q("match", ** { field + ".name": name }))\ .bucket("values", "terms", field=field + ".value.keyword", size=args.size)\ .bucket("main", "reverse_nested")\ .bucket("urls", "terms", field="request.url.keyword", size=args.size) return s
Example #14
Source File: test_querysets.py From django-zombodb with MIT License | 6 votes |
def test_dsl_search(self): results = Restaurant.objects.dsl_search(ElasticsearchQ( 'bool', must=[ ElasticsearchQ('match', street='Skillman Ave'), ElasticsearchQ('match', categories='French') ] )) self.assertCountEqual(results, [self.soleil]) results = Restaurant.objects.dsl_search(ElasticsearchQ( 'bool', must=[ ElasticsearchQ('match', street='Skillman Ave'), ElasticsearchQ('match', zip_code='11377') ] )) self.assertCountEqual(results, [self.tj, self.soleil]) results = Restaurant.objects.dsl_search(Term(email='alcove@example.org')) self.assertCountEqual(results, [self.alcove])
Example #15
Source File: es_wrapper.py From bitshares-explorer-api with MIT License | 6 votes |
def get_trade_history(size=10, from_date='2015-10-10', to_date='now', sort_by='-operation_id_num', search_after=None, base="1.3.0", quote="1.3.121"): s = Search(using=es, index="bitshares-*") s = s.extra(size=size) if search_after and search_after != '': s = s.extra(search_after=search_after.split(',')) q = Q() q = q & Q("match", operation_type=4) q = q & Q("match", operation_history__op_object__is_maker=True) q = q & Q("match", operation_history__op_object__fill_price__base__asset_id=base) q = q & Q("match", operation_history__op_object__fill_price__quote__asset_id=quote) range_query = Q("range", block_data__block_time={'gte': from_date, 'lte': to_date}) s.query = q & range_query s = s.sort(*sort_by.split(',')) response = s.execute() verify_es_response(response) return [hit.to_dict() for hit in response]
Example #16
Source File: test_search.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_query_can_be_assigned_to(): s = search.Search() q = Q('match', title='python') s.query = q assert s.query._proxied is q
Example #17
Source File: utils.py From userline with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_dsl_logoff_query(screen): q = None for evtid in config.EVENTS_LOGOFF: tmp = Q("match",event_identifier=evtid) if q is None: q = tmp else: q = q | tmp if screen is True: for evtid in config.EVENTS_LOGOFF_SCREEN: q = q | Q("match",event_identifier=evtid) return q
Example #18
Source File: search.py From nyaa with GNU General Public License v3.0 | 5 votes |
def _es_name_exact_phrase(literal): ''' Returns a Query for a phrase match on the display_name for a given literal ''' return Q({ 'match_phrase': { 'display_name.exact': { 'query': literal, 'analyzer': 'exact_analyzer' } } })
Example #19
Source File: test_querysets.py From django-zombodb with MIT License | 5 votes |
def test_dsl_search_validate(self): query = ElasticsearchQ('bool') query.name = 'wrong' with self.assertRaises(InvalidElasticsearchQuery) as cm: Restaurant.objects.dsl_search(query, validate=True) self.assertRegex( str(cm.exception), "Invalid Elasticsearch query: (.+)")
Example #20
Source File: test_querysets.py From django-zombodb with MIT License | 5 votes |
def test_dsl_search_score_attr(self): results = Restaurant.objects.dsl_search( ElasticsearchQ('match', street='skillman'), sort=True, score_attr='custom_score') self.assertEqual(len(results), 2) for r in results: self.assertTrue(hasattr(r, 'custom_score')) self.assertGreater(r.custom_score, 0)
Example #21
Source File: crawl.py From weapp-zhihulive with Apache License 2.0 | 5 votes |
def parse_zhuanlan_link(self, response): posts = await response.json() if response.status == 200 and posts: for post in posts: cover = post['titleImage'] if not cover: continue s = Live.search() title = post['title'] for sep in ('-', '—'): if sep in title: title = title.split(sep)[-1].strip() speaker_id = post['author']['hash'] zid = post['url'].split('/')[-1] s = s.query(Q('match_phrase', subject=title)) lives = await s.execute() for live in lives: if live.speaker and live.speaker.speaker_id == speaker_id: await self.update_live(zid, cover, live) break else: match = LIVE_REGEX.search(post['content']) if match: live_id = match.group(2) try: live = await Live.get(live_id) except NotFoundError: pass else: await self.update_live(zid, cover, live) return get_next_url(response.url)
Example #22
Source File: more_like_this_validator.py From texta with GNU General Public License v3.0 | 5 votes |
def validate_filter(filter: dict): try: query = Q(filter) except Exception as e: logging.getLogger(ERROR_LOGGER).exception("Could not parse filter query {}.".format(filter)) raise serializers.ValidationError("Could not parse the filter, query. Make sure you have not included the top 'query' key!")
Example #23
Source File: test_querysets.py From django-zombodb with MIT License | 5 votes |
def test_dsl_search_sort(self): results = Restaurant.objects.dsl_search( ElasticsearchQ( 'bool', should=[ ElasticsearchQ('match', categories='sushi'), ElasticsearchQ('match', categories='asian'), ElasticsearchQ('match', categories='japanese'), ElasticsearchQ('match', categories='french'), ], minimum_should_match=1 ), validate=True, sort=True) self.assertEqual(list(results), [self.tj, self.soleil]) results = Restaurant.objects.dsl_search( ElasticsearchQ( 'bool', should=[ ElasticsearchQ('match', categories='french'), ElasticsearchQ('match', categories='coffee'), ElasticsearchQ('match', categories='european'), ElasticsearchQ('match', categories='sushi'), ], minimum_should_match=1 ), sort=True) self.assertEqual(list(results), [self.soleil, self.tj])
Example #24
Source File: tor_elasticsearch.py From freshonions-torscraper with GNU Affero General Public License v3.0 | 5 votes |
def elasticsearch_retrieve_page_by_id(page_id): query = Search().filter(Q("term", nid=int(page_id)))[:1] result = query.execute() if result.hits.total == 0: return None return result.hits[0]
Example #25
Source File: esnotifications.py From stethoscope with Apache License 2.0 | 5 votes |
def create_query_for_email(self, search, email): return search.query(elasticsearch_dsl.Q({"match": {'email': email}}))
Example #26
Source File: tor_elasticsearch.py From freshonions-torscraper with GNU Affero General Public License v3.0 | 5 votes |
def elasticsearch_delete_old(): _from = NEVER _to = datetime.now() - timedelta(days=30) query = Search().filter(Q("range", visited_at={'from': _from, 'to': _to})) result = query.delete()
Example #27
Source File: es_wrapper.py From bitshares-explorer-api with MIT License | 5 votes |
def get_trx(trx, from_=0, size=10): s = Search(using=es, index="bitshares-*", extra={"size": size, "from": from_}) s.query = Q("match", block_data__trx_id=trx) response = s.execute() verify_es_response(response) return [hit.to_dict() for hit in response]
Example #28
Source File: es_wrapper.py From bitshares-explorer-api with MIT License | 5 votes |
def is_alive(): find_string = datetime.utcnow().strftime("%Y-%m") from_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d") s = Search(using=es, index="bitshares-" + find_string) s.query = Q("range", block_data__block_time={'gte': from_date, 'lte': "now"}) s.aggs.metric("max_block_time", "max", field="block_data.block_time") json_response = { "server_time": datetime.utcnow(), "head_block_timestamp": None, "head_block_time": None } response = s.execute() verify_es_response(response) if response.aggregations.max_block_time.value is not None: json_response["head_block_time"] = str(response.aggregations.max_block_time.value_as_string) json_response["head_block_timestamp"] = response.aggregations.max_block_time.value json_response["deltatime"] = abs((datetime.utcfromtimestamp(json_response["head_block_timestamp"] / 1000) - json_response["server_time"]).total_seconds()) if json_response["deltatime"] < 30: json_response["status"] = "ok" else: json_response["status"] = "out_of_sync" json_response["error"] = "last_block_too_old" else: json_response["status"] = "out_of_sync" json_response["deltatime"] = "Infinite" json_response["query_index"] = find_string json_response["query_from_date"] = from_date json_response["error"] = "no_blocks_last_24_hours" return json_response
Example #29
Source File: es_wrapper.py From bitshares-explorer-api with MIT License | 5 votes |
def get_single_operation(operation_id): s = Search(using=es, index="bitshares-*") s.query = Q("match", account_history__operation_id=operation_id) response = s.execute() verify_es_response(response) return [hit.to_dict() for hit in response][0]
Example #30
Source File: utils.py From userline with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_last_shutdown(index,maxtstamp,pattern): """ Look for the last shutdown event """ conn = connections.get_connection() q = [ \ Q('match',data_type='windows:evtx:record') , \ Q('match',event_identifier=config.EVENT_SHUTDOWN) ] if pattern: q.append(Q('query_string',query=pattern,analyze_wildcard=True)) s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte':maxtstamp}).sort('-datetime')[0:0] s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('shutdown','top_hits',size=1) res = s.execute() ret = {} for item in res.aggregations['computer']['buckets']: ret[item['key']] = item['shutdown']['hits']['hits'][0] if len(ret.keys()) == 0: ret = None return ret