Python elasticsearch_dsl.Index() Examples
The following are 30
code examples of elasticsearch_dsl.Index().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
elasticsearch_dsl
, or try the search function
.
Example #1
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 6 votes |
def test_index_template_works(write_client): it = IndexTemplate('test-template', 'test-*') it.document(Post) it.settings(number_of_replicas=0, number_of_shards=1) it.save() i = Index('test-blog') i.create() assert { 'test-blog': { 'mappings': { 'properties': { 'title': {'type': 'text', 'analyzer': 'my_analyzer'}, 'published_from': {'type': 'date'}, } } } } == write_client.indices.get_mapping(index='test-blog')
Example #2
Source File: index.py From series-tiempo-ar-api with MIT License | 6 votes |
def add_analyzer(index: Index): """Agrega un nuevo analyzer al índice, disponible para ser usado en todos sus fields. El analyzer aplica lower case + ascii fold: quita acentos y uso de ñ, entre otros, para permitir búsqueda de texto en español """ synonyms = list(Synonym.objects.values_list('terms', flat=True)) filters = ['lowercase', 'asciifolding'] if synonyms: filters.append(token_filter(constants.SYNONYM_FILTER, type='synonym', synonyms=synonyms)) index.analyzer( analyzer(constants.ANALYZER, tokenizer='standard', filter=filters) )
Example #3
Source File: ElasticBurp.py From WASE with GNU General Public License v3.0 | 6 votes |
def applyConfig(self): try: print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex)) self.es = connections.create_connection(hosts=[self.confESHost]) self.idx = Index(self.confESIndex) self.idx.doc_type(DocHTTPRequestResponse) if self.idx.exists(): self.idx.open() else: self.idx.create() self.callbacks.saveExtensionSetting("elasticburp.host", self.confESHost) self.callbacks.saveExtensionSetting("elasticburp.index", self.confESIndex) self.callbacks.saveExtensionSetting("elasticburp.tools", str(self.confBurpTools)) self.callbacks.saveExtensionSetting("elasticburp.onlyresp", str(int(self.confBurpOnlyResp))) except Exception as e: JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE) ### ITab ###
Example #4
Source File: index.py From series-tiempo-ar-api with MIT License | 6 votes |
def tseries_index(name: str) -> Index: index = Index(name) # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería # la cantidad de valores máximas que puede tener una única serie temporal. index.settings(max_result_window=settings.MAX_SERIES_VALUES) if not index.exists(): index.create() index.put_mapping(doc_type=settings.TS_DOC_TYPE, body=constants.MAPPING) index.save() # Actualizo el mapping mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE) doc_properties = mapping[name]['mappings'][settings.TS_DOC_TYPE]['properties'] if not doc_properties.get('raw_value'): index.put_mapping(doc_type=settings.TS_DOC_TYPE, body=constants.MAPPING) return index
Example #5
Source File: popularity.py From series-tiempo-ar-api with MIT License | 6 votes |
def update_popularity_metadata(distribution: Distribution): if not Index(SeriesQuery._doc_type.index).exists(): return series = SeriesRepository.get_available_series(distribution=distribution) series_ids = series.values_list('identifier', flat=True) if not series_ids: return for meta_key, days in KEY_DAYS_PAIRS: s = SeriesQuery.search() if days: s = s.filter('range', timestamp={'gte': f'now-{days}d/d'}) buckets = {serie_id: get_serie_filter(serie_id) for serie_id in series_ids} agg_result = popularity_aggregation(s, buckets) update_series_popularity_metadata(agg_result, meta_key, series)
Example #6
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 6 votes |
def test_multiple_indices_with_same_doc_type_work(write_client): i1 = Index('test-index-1', using=write_client) i2 = Index('test-index-2', using=write_client) for i in (i1, i2): i.document(Post) i.create() for i in ('test-index-1', 'test-index-2'): settings = write_client.indices.get_settings(index=i) assert settings[i]['settings']['index']['analysis'] == { 'analyzer': { 'my_analyzer': { 'type': 'custom', 'tokenizer': 'keyword' } } }
Example #7
Source File: indicator.py From bearded-avenger with Mozilla Public License 2.0 | 6 votes |
def _create_index(self): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk # every time we check it does a HEAD req if self.last_index_value and (datetime.utcnow() - self.last_index_check) < timedelta(minutes=2): return self.last_index_value idx = self._current_index() if not self.handle.indices.exists(idx): index = Index(idx) index.aliases(live={}) index.doc_type(Indicator) index.settings(max_result_window=WINDOW_LIMIT) index.create() self.handle.indices.flush(idx) self.last_index_check = datetime.utcnow() self.last_index_value = idx return idx
Example #8
Source File: elastic_logs.py From quay with Apache License 2.0 | 6 votes |
def init(cls, index_prefix, index_settings=None, skip_template_init=False): """ Create the index template, and populate LogEntry's mapping and index settings. """ wildcard_index = Index(name=index_prefix + "*") wildcard_index.settings(**(index_settings or {})) wildcard_index.document(cls) cls._index = wildcard_index cls._index_prefix = index_prefix if not skip_template_init: cls.create_or_update_template() # Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level, # this function needs to be called first before being able to call `save`. cls._initialized = True
Example #9
Source File: tests.py From pyspark-elastic with Apache License 2.0 | 6 votes |
def test_dynamic_resource(self): Index('test-1').delete(ignore=404) Index('test-2').delete(ignore=404) docs1 = [ dict(idx='test-1', body='something'), dict(idx='test-1', body='else'), ] docs2 = [ dict(idx='test-2', body='abra'), dict(idx='test-2', body='ca'), dict(idx='test-2', body='dabra'), ] self.sc.parallelize(docs1 + docs2).saveToEs(resource_write='{idx}/docs') self.assertEqual(self.sc.esRDD('test-1/docs').count(), 2) self.assertEqual(self.sc.esRDD('test-2/docs').count(), 3) self.assertEqual( set(d['body'] for d in self.sc.esRDD('test-1/docs').loads().collectAsMap().values()), set(d['body'] for d in docs1) )
Example #10
Source File: es_config.py From georef-ar-api with MIT License | 5 votes |
def create_index(es, name, doc_class, shards, replicas, synonyms=None, excluding_terms=None): """Crea un índice Elasticsearch utilizando un nombre y una clase de documento. Args: es (elasticsearch.Elasticsearch): Cliente Elasticsearch. name (str): Nombre del índice a crear. doc_class (type): Clase del documento (debe heredar de Document). shards (int): Cantidad de "shards" a utilizar para el índice. replicas (int): Cantidad de réplicas por "shard". synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el analizador 'name_analyzer_synonyms'. excluding_terms (list): Lista de términos excluyentes a utilizar en caso de necesitar el analizador 'name_analyzer_excluding_terms'. """ index = Index(name) # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador # en algún punto de su mapeo, la lista 'synonyms' debería estar presente. if synonyms is not None: index.analyzer(gen_name_analyzer_synonyms(synonyms)) # Mismo razonamiento que con 'name_analyzer_synonyms'. if excluding_terms is not None: index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms)) index.document(doc_class) index.settings(number_of_shards=shards, number_of_replicas=replicas) index.create(using=es)
Example #11
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_index_can_be_created_with_settings_and_mappings(write_client): i = Index('test-blog', using=write_client) i.document(Post) i.settings(number_of_replicas=0, number_of_shards=1) i.create() assert { 'test-blog': { 'mappings': { 'properties': { 'title': {'type': 'text', 'analyzer': 'my_analyzer'}, 'published_from': {'type': 'date'} } } } } == write_client.indices.get_mapping(index='test-blog') settings = write_client.indices.get_settings(index='test-blog') assert settings['test-blog']['settings']['index']['number_of_replicas'] == '0' assert settings['test-blog']['settings']['index']['number_of_shards'] == '1' assert settings['test-blog']['settings']['index']['analysis'] == { 'analyzer': { 'my_analyzer': { 'type': 'custom', 'tokenizer': 'keyword' } } }
Example #12
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_delete(write_client): write_client.indices.create( index='test-index', body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}} ) i = Index('test-index', using=write_client) i.delete() assert not write_client.indices.exists(index='test-index')
Example #13
Source File: indexer.py From open-ledger with MIT License | 5 votes |
def index_all_images(self, chunk_size=DEFAULT_CHUNK_SIZE, num_iterations=DEFAULT_NUM_ITERATIONS, num_threads=DEFAULT_NUM_THREADS): """Index every record in the database with a server-side cursor""" index = Index(settings.ELASTICSEARCH_INDEX) if not index.exists(): log.info("Creating new index %s", settings.ELASTICSEARCH_INDEX) search.Image.init() mapping = search.Image._doc_type.mapping mapping.save(settings.ELASTICSEARCH_INDEX) log.info("Done creating new index") with Pool(num_threads) as pool: starts = [i * chunk_size for i in range(0, num_iterations)] pool.starmap(do_index, zip(starts, itertools.repeat(chunk_size, len(starts))))
Example #14
Source File: test_search.py From open-ledger with MIT License | 5 votes |
def tearDown(self): index = Index(settings.ELASTICSEARCH_INDEX) index.delete(ignore=404)
Example #15
Source File: test_search.py From open-ledger with MIT License | 5 votes |
def _index_img(self, img): """Index a single img and ensure that it's been propagated to the search engine""" image = search.db_image_to_index(img) image.save() index = Index(name=settings.ELASTICSEARCH_INDEX) index.flush(force=True) index.refresh()
Example #16
Source File: tor_elasticsearch.py From freshonions-torscraper with GNU Affero General Public License v3.0 | 5 votes |
def migrate(): hidden_services = Index('hiddenservices') hidden_services.delete(ignore=404) hidden_services = Index('hiddenservices') hidden_services.doc_type(DomainDocType) hidden_services.doc_type(PageDocType) hidden_services.settings(number_of_shards=8, number_of_replicas=1) hidden_services.create()
Example #17
Source File: admin_index_test.py From hepdata with GNU General Public License v2.0 | 5 votes |
def test_recreate_index(admin_idx): admin_idx.recreate_index() index = Index(admin_idx.index) assert (index.exists())
Example #18
Source File: zelasticsearch.py From csirtg-smrt-v1 with Mozilla Public License 2.0 | 5 votes |
def _create_index(self): dt = datetime.utcnow() dt = dt.strftime('%Y.%m') es = connections.get_connection() if not es.indices.exists('indicators-{}'.format(dt)): index = Index('indicators-{}'.format(dt)) index.aliases(live={}) index.doc_type(Indicator) index.create() m = Mapping('indicator') m.field('indicator_ipv4', 'ip') m.field('indicator_ipv4_mask', 'integer') m.save('indicators-{}'.format(dt)) return 'indicators-{}'.format(dt)
Example #19
Source File: index.py From series-tiempo-ar-api with MIT License | 5 votes |
def __init__(self, index: str = SERIES_QUERY_INDEX_NAME): self.es_index = Index(index) self.es_index.doc_type(SeriesQuery) self.es_connection = connections.get_connection()
Example #20
Source File: indexer_tests.py From series-tiempo-ar-api with MIT License | 5 votes |
def setUpClass(cls): super(IndexerTests, cls).setUpClass() Catalog.objects.all().delete() fake = faker.Faker() cls.fake_index = Index(fake.pystr(max_chars=50).lower()) add_analyzer(cls.fake_index)
Example #21
Source File: api.py From hepdata with GNU General Public License v2.0 | 5 votes |
def recreate_index(self): """ Delete and then create a given index and set a default mapping. :param index: [string] name of the index. If None a default is used """ submission = Index(self.index) submission.delete(ignore=404) ESSubmission.init(self.index)
Example #22
Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_from_es_respects_underscored_non_meta_fields(): doc = { "_index": "test-index", "_id": "elasticsearch", "_score": 12.0, "fields": { "hello": "world", "_routing": "es", "_tags": ["search"] }, "_source": { "city": "Amsterdam", "name": "Elasticsearch", "_tagline": "You know, for search" } } class Company(document.Document): class Index: name = 'test-company' c = Company.from_es(doc) assert c.meta.fields._tags == ['search'] assert c.meta.fields._routing == 'es' assert c._tagline == 'You know, for search'
Example #23
Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_inherited_doc_types_can_override_index(): class MyDocDifferentIndex(MySubDoc): class Index: name = 'not-default-index' settings = { 'number_of_replicas': 0 } aliases = {'a': {}} analyzers = [analyzer('my_analizer', tokenizer='keyword')] assert MyDocDifferentIndex._index._name == 'not-default-index' assert MyDocDifferentIndex()._get_index() == 'not-default-index' assert MyDocDifferentIndex._index.to_dict() == { 'aliases': {'a': {}}, 'mappings': { 'properties': { 'created_at': {'type': 'date'}, 'inner': { 'type': 'object', 'properties': { 'old_field': {'type': 'text'} }, }, 'name': {'type': 'keyword'}, 'title': {'type': 'keyword'} } }, 'settings': { 'analysis': { 'analyzer': { 'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'} } }, 'number_of_replicas': 0 } }
Example #24
Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_matches_accepts_wildcards(): class MyDoc(document.Document): class Index: name = 'my-*' assert MyDoc._matches({ '_index': 'my-index' }) assert not MyDoc._matches({ '_index': 'not-my-index' })
Example #25
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_index_exists(data_client): assert Index('git').exists() assert not Index('not-there').exists()
Example #26
Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_conflicting_mapping_raises_error_in_index_to_dict(): class A(document.Document): name = field.Text() class B(document.Document): name = field.Keyword() i = Index('i') i.document(A) i.document(B) with raises(ValueError): i.to_dict()
Example #27
Source File: tests.py From pyspark-elastic with Apache License 2.0 | 5 votes |
def test_save_exclude_fields(self): docs = [ dict(title='1', body='a'), dict(title='2', body='b'), dict(title='1', body='c'), ] self.sc.parallelize(docs).saveToEs(self.resource, mapping_exclude='body') read = self.rdd().collect() self.assertEqual(len(read), 3) for doc in read: self.assertNotIn('body', doc) # def test_save_with_script(self): # # es.update.script # # es.update.script.lang # # es.update.script.params # pass # # TODO # def test_autocreate_index(self): # index = Index('pyspark_elastic_non_existing') # index.delete(ignore=404) # # def save(): # self.docs.saveToEs(index._name + '/doc_type', index_auto_create='no') # self.assertRaises(Exception, save)
Example #28
Source File: tests.py From pyspark-elastic with Apache License 2.0 | 5 votes |
def setUp(self): self.index = index = Index('pyspark_elastic') index.settings(number_of_shards=4) index.create(ignore=400) index.doc_type(self.TestDoc) self.resource = self.index._name + '/' + self.TestDoc._doc_type.name
Example #29
Source File: elasticsearch.py From qb with MIT License | 5 votes |
def __init__(self, name='qb', similarity='default', bm25_b=None, bm25_k1=None): self.name = name self.ix = Index(self.name) self.answer_doc = create_doctype(self.name, similarity) if bm25_b is None: bm25_b = .75 if bm25_k1 is None: bm25_k1 = 1.2 self.bm25_b = bm25_b self.bm25_k1 = bm25_k1
Example #30
Source File: elasticsearch.py From qb with MIT License | 5 votes |
def build_large_docs(self, documents: Dict[str, str], use_wiki=True, use_qb=True, rebuild_index=False): if rebuild_index or bool(int(os.getenv('QB_REBUILD_INDEX', 0))): log.info(f'Deleting index: {self.name}') self.delete() if self.exists(): log.info(f'Index {self.name} exists') else: log.info(f'Index {self.name} does not exist') self.init() wiki_lookup = Wikipedia() log.info('Indexing questions and corresponding wikipedia pages as large docs...') for page in tqdm.tqdm(documents): if use_wiki and page in wiki_lookup: wiki_content = wiki_lookup[page].text else: wiki_content = '' if use_qb: qb_content = documents[page] else: qb_content = '' answer = self.answer_doc( page=page, wiki_content=wiki_content, qb_content=qb_content ) answer.save(index=self.name)