Python elasticsearch_dsl.Index() Examples

The following are 30 code examples of elasticsearch_dsl.Index(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch_dsl , or try the search function .
Example #1
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 6 votes vote down vote up
def test_index_template_works(write_client):
    it = IndexTemplate('test-template', 'test-*')
    it.document(Post)
    it.settings(number_of_replicas=0, number_of_shards=1)
    it.save()

    i = Index('test-blog')
    i.create()

    assert {
        'test-blog': {
            'mappings': {
                'properties': {
                    'title': {'type': 'text', 'analyzer': 'my_analyzer'},
                    'published_from': {'type': 'date'},
                }
            }
        }
    } == write_client.indices.get_mapping(index='test-blog') 
Example #2
Source File: index.py    From series-tiempo-ar-api with MIT License 6 votes vote down vote up
def add_analyzer(index: Index):
    """Agrega un nuevo analyzer al índice, disponible para ser usado
    en todos sus fields. El analyzer aplica lower case + ascii fold:
    quita acentos y uso de ñ, entre otros, para permitir búsqueda de
    texto en español
    """

    synonyms = list(Synonym.objects.values_list('terms', flat=True))

    filters = ['lowercase', 'asciifolding']
    if synonyms:
        filters.append(token_filter(constants.SYNONYM_FILTER,
                                    type='synonym',
                                    synonyms=synonyms))

    index.analyzer(
        analyzer(constants.ANALYZER,
                 tokenizer='standard',
                 filter=filters)
    ) 
Example #3
Source File: ElasticBurp.py    From WASE with GNU General Public License v3.0 6 votes vote down vote up
def applyConfig(self):
        try:
            print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex))
            self.es = connections.create_connection(hosts=[self.confESHost])
            self.idx = Index(self.confESIndex)
            self.idx.doc_type(DocHTTPRequestResponse)
            if self.idx.exists():
                self.idx.open()
            else:
                self.idx.create()
            self.callbacks.saveExtensionSetting("elasticburp.host", self.confESHost)
            self.callbacks.saveExtensionSetting("elasticburp.index", self.confESIndex)
            self.callbacks.saveExtensionSetting("elasticburp.tools", str(self.confBurpTools))
            self.callbacks.saveExtensionSetting("elasticburp.onlyresp", str(int(self.confBurpOnlyResp)))
        except Exception as e:
            JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)

    ### ITab ### 
Example #4
Source File: index.py    From series-tiempo-ar-api with MIT License 6 votes vote down vote up
def tseries_index(name: str) -> Index:
    index = Index(name)

    # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería
    # la cantidad de valores máximas que puede tener una única serie temporal.
    index.settings(max_result_window=settings.MAX_SERIES_VALUES)

    if not index.exists():
        index.create()
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    index.save()
    # Actualizo el mapping
    mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE)

    doc_properties = mapping[name]['mappings'][settings.TS_DOC_TYPE]['properties']
    if not doc_properties.get('raw_value'):
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    return index 
Example #5
Source File: popularity.py    From series-tiempo-ar-api with MIT License 6 votes vote down vote up
def update_popularity_metadata(distribution: Distribution):
    if not Index(SeriesQuery._doc_type.index).exists():
        return

    series = SeriesRepository.get_available_series(distribution=distribution)

    series_ids = series.values_list('identifier', flat=True)

    if not series_ids:
        return

    for meta_key, days in KEY_DAYS_PAIRS:
        s = SeriesQuery.search()
        if days:
            s = s.filter('range', timestamp={'gte': f'now-{days}d/d'})
        buckets = {serie_id: get_serie_filter(serie_id) for serie_id in series_ids}
        agg_result = popularity_aggregation(s, buckets)

        update_series_popularity_metadata(agg_result, meta_key, series) 
Example #6
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 6 votes vote down vote up
def test_multiple_indices_with_same_doc_type_work(write_client):
    i1 = Index('test-index-1', using=write_client)
    i2 = Index('test-index-2', using=write_client)

    for i in (i1, i2):
        i.document(Post)
        i.create()

    for i in ('test-index-1', 'test-index-2'):
        settings = write_client.indices.get_settings(index=i)
        assert settings[i]['settings']['index']['analysis'] == {
            'analyzer': {
                'my_analyzer': {
                    'type': 'custom',
                    'tokenizer': 'keyword'
                }
            }
        } 
Example #7
Source File: indicator.py    From bearded-avenger with Mozilla Public License 2.0 6 votes vote down vote up
def _create_index(self):
        # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json
        # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk

        # every time we check it does a HEAD req
        if self.last_index_value and (datetime.utcnow() - self.last_index_check) < timedelta(minutes=2):
            return self.last_index_value

        idx = self._current_index()

        if not self.handle.indices.exists(idx):
            index = Index(idx)
            index.aliases(live={})
            index.doc_type(Indicator)
            index.settings(max_result_window=WINDOW_LIMIT)
            index.create()
            self.handle.indices.flush(idx)

        self.last_index_check = datetime.utcnow()
        self.last_index_value = idx
        return idx 
Example #8
Source File: elastic_logs.py    From quay with Apache License 2.0 6 votes vote down vote up
def init(cls, index_prefix, index_settings=None, skip_template_init=False):
        """
        Create the index template, and populate LogEntry's mapping and index settings.
        """
        wildcard_index = Index(name=index_prefix + "*")
        wildcard_index.settings(**(index_settings or {}))
        wildcard_index.document(cls)
        cls._index = wildcard_index
        cls._index_prefix = index_prefix

        if not skip_template_init:
            cls.create_or_update_template()

        # Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level,
        # this function needs to be called first before being able to call `save`.
        cls._initialized = True 
Example #9
Source File: tests.py    From pyspark-elastic with Apache License 2.0 6 votes vote down vote up
def test_dynamic_resource(self):
        Index('test-1').delete(ignore=404)
        Index('test-2').delete(ignore=404)

        docs1 = [
            dict(idx='test-1', body='something'),
            dict(idx='test-1', body='else'),
        ]
        docs2 = [
            dict(idx='test-2', body='abra'),
            dict(idx='test-2', body='ca'),
            dict(idx='test-2', body='dabra'),
        ]

        self.sc.parallelize(docs1 + docs2).saveToEs(resource_write='{idx}/docs')
        self.assertEqual(self.sc.esRDD('test-1/docs').count(), 2)
        self.assertEqual(self.sc.esRDD('test-2/docs').count(), 3)

        self.assertEqual(
            set(d['body'] for d in self.sc.esRDD('test-1/docs').loads().collectAsMap().values()),
            set(d['body'] for d in docs1)
        ) 
Example #10
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def create_index(es, name, doc_class, shards, replicas, synonyms=None,
                 excluding_terms=None):
    """Crea un índice Elasticsearch utilizando un nombre y una clase de
    documento.

    Args:
        es (elasticsearch.Elasticsearch): Cliente Elasticsearch.
        name (str): Nombre del índice a crear.
        doc_class (type): Clase del documento (debe heredar de Document).
        shards (int): Cantidad de "shards" a utilizar para el índice.
        replicas (int): Cantidad de réplicas por "shard".
        synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el
            analizador 'name_analyzer_synonyms'.
        excluding_terms (list): Lista de términos excluyentes a utilizar en
            caso de necesitar el analizador 'name_analyzer_excluding_terms'.

    """
    index = Index(name)

    # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió
    # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador
    # en algún punto de su mapeo, la lista 'synonyms' debería estar presente.
    if synonyms is not None:
        index.analyzer(gen_name_analyzer_synonyms(synonyms))

    # Mismo razonamiento que con 'name_analyzer_synonyms'.
    if excluding_terms is not None:
        index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms))

    index.document(doc_class)
    index.settings(number_of_shards=shards, number_of_replicas=replicas)
    index.create(using=es) 
Example #11
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_index_can_be_created_with_settings_and_mappings(write_client):
    i = Index('test-blog', using=write_client)
    i.document(Post)
    i.settings(number_of_replicas=0, number_of_shards=1)
    i.create()

    assert {
        'test-blog': {
            'mappings': {
                'properties': {
                    'title': {'type': 'text', 'analyzer': 'my_analyzer'},
                    'published_from': {'type': 'date'}
                }
            }
        }
    } == write_client.indices.get_mapping(index='test-blog')

    settings = write_client.indices.get_settings(index='test-blog')
    assert settings['test-blog']['settings']['index']['number_of_replicas'] == '0'
    assert settings['test-blog']['settings']['index']['number_of_shards'] == '1'
    assert settings['test-blog']['settings']['index']['analysis'] == {
        'analyzer': {
            'my_analyzer': {
                'type': 'custom',
                'tokenizer': 'keyword'
            }
        }
    } 
Example #12
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_delete(write_client):
    write_client.indices.create(
        index='test-index',
        body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}}
    )

    i = Index('test-index', using=write_client)
    i.delete()
    assert not write_client.indices.exists(index='test-index') 
Example #13
Source File: indexer.py    From open-ledger with MIT License 5 votes vote down vote up
def index_all_images(self, chunk_size=DEFAULT_CHUNK_SIZE, num_iterations=DEFAULT_NUM_ITERATIONS,
                         num_threads=DEFAULT_NUM_THREADS):
        """Index every record in the database with a server-side cursor"""
        index = Index(settings.ELASTICSEARCH_INDEX)
        if not index.exists():
            log.info("Creating new index %s", settings.ELASTICSEARCH_INDEX)
            search.Image.init()
            mapping = search.Image._doc_type.mapping
            mapping.save(settings.ELASTICSEARCH_INDEX)
            log.info("Done creating new index")

        with Pool(num_threads) as pool:
            starts = [i * chunk_size for i in range(0, num_iterations)]
            pool.starmap(do_index, zip(starts, itertools.repeat(chunk_size, len(starts)))) 
Example #14
Source File: test_search.py    From open-ledger with MIT License 5 votes vote down vote up
def tearDown(self):
        index = Index(settings.ELASTICSEARCH_INDEX)
        index.delete(ignore=404) 
Example #15
Source File: test_search.py    From open-ledger with MIT License 5 votes vote down vote up
def _index_img(self, img):
        """Index a single img and ensure that it's been propagated to the search engine"""
        image = search.db_image_to_index(img)
        image.save()
        index = Index(name=settings.ELASTICSEARCH_INDEX)
        index.flush(force=True)
        index.refresh() 
Example #16
Source File: tor_elasticsearch.py    From freshonions-torscraper with GNU Affero General Public License v3.0 5 votes vote down vote up
def migrate():
    hidden_services = Index('hiddenservices')
    hidden_services.delete(ignore=404)
    hidden_services = Index('hiddenservices')
    hidden_services.doc_type(DomainDocType)
    hidden_services.doc_type(PageDocType)
    hidden_services.settings(number_of_shards=8, number_of_replicas=1)
    hidden_services.create() 
Example #17
Source File: admin_index_test.py    From hepdata with GNU General Public License v2.0 5 votes vote down vote up
def test_recreate_index(admin_idx):
    admin_idx.recreate_index()

    index = Index(admin_idx.index)
    assert (index.exists()) 
Example #18
Source File: zelasticsearch.py    From csirtg-smrt-v1 with Mozilla Public License 2.0 5 votes vote down vote up
def _create_index(self):
        dt = datetime.utcnow()
        dt = dt.strftime('%Y.%m')
        es = connections.get_connection()
        if not es.indices.exists('indicators-{}'.format(dt)):
            index = Index('indicators-{}'.format(dt))
            index.aliases(live={})
            index.doc_type(Indicator)
            index.create()

            m = Mapping('indicator')
            m.field('indicator_ipv4', 'ip')
            m.field('indicator_ipv4_mask', 'integer')
            m.save('indicators-{}'.format(dt))
        return 'indicators-{}'.format(dt) 
Example #19
Source File: index.py    From series-tiempo-ar-api with MIT License 5 votes vote down vote up
def __init__(self, index: str = SERIES_QUERY_INDEX_NAME):
        self.es_index = Index(index)
        self.es_index.doc_type(SeriesQuery)
        self.es_connection = connections.get_connection() 
Example #20
Source File: indexer_tests.py    From series-tiempo-ar-api with MIT License 5 votes vote down vote up
def setUpClass(cls):
        super(IndexerTests, cls).setUpClass()
        Catalog.objects.all().delete()
        fake = faker.Faker()

        cls.fake_index = Index(fake.pystr(max_chars=50).lower())
        add_analyzer(cls.fake_index) 
Example #21
Source File: api.py    From hepdata with GNU General Public License v2.0 5 votes vote down vote up
def recreate_index(self):
        """ Delete and then create a given index and set a default mapping.

        :param index: [string] name of the index. If None a default is used
        """
        submission = Index(self.index)
        submission.delete(ignore=404)

        ESSubmission.init(self.index) 
Example #22
Source File: test_document.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_from_es_respects_underscored_non_meta_fields():
    doc = {
        "_index": "test-index",
        "_id": "elasticsearch",
        "_score": 12.0,

        "fields": {
            "hello": "world",
            "_routing": "es",
            "_tags": ["search"]

        },

        "_source": {
            "city": "Amsterdam",
            "name": "Elasticsearch",
            "_tagline": "You know, for search"
        }
    }

    class Company(document.Document):
        class Index:
            name = 'test-company'

    c = Company.from_es(doc)

    assert c.meta.fields._tags == ['search']
    assert c.meta.fields._routing == 'es'
    assert c._tagline == 'You know, for search' 
Example #23
Source File: test_document.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_inherited_doc_types_can_override_index():
    class MyDocDifferentIndex(MySubDoc):
        class Index:
            name = 'not-default-index'
            settings = {
                'number_of_replicas': 0
            }
            aliases = {'a': {}}
            analyzers = [analyzer('my_analizer', tokenizer='keyword')]

    assert MyDocDifferentIndex._index._name == 'not-default-index'
    assert MyDocDifferentIndex()._get_index() == 'not-default-index'
    assert MyDocDifferentIndex._index.to_dict() == {
        'aliases': {'a': {}},
        'mappings': {
            'properties': {
                'created_at': {'type': 'date'},
                'inner': {
                    'type': 'object',
                    'properties': {
                        'old_field': {'type': 'text'}
                    },
                },
                'name': {'type': 'keyword'},
                'title': {'type': 'keyword'}
            }
        },
        'settings': {
            'analysis': {
                'analyzer': {
                    'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'}
                }
            },
            'number_of_replicas': 0
        }
    } 
Example #24
Source File: test_document.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_matches_accepts_wildcards():
    class MyDoc(document.Document):
        class Index:
            name = 'my-*'

    assert MyDoc._matches({
        '_index': 'my-index'
    })
    assert not MyDoc._matches({
        '_index': 'not-my-index'
    }) 
Example #25
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_index_exists(data_client):
    assert Index('git').exists()
    assert not Index('not-there').exists() 
Example #26
Source File: test_document.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_conflicting_mapping_raises_error_in_index_to_dict():
    class A(document.Document):
        name = field.Text()

    class B(document.Document):
        name = field.Keyword()

    i = Index('i')
    i.document(A)
    i.document(B)

    with raises(ValueError):
        i.to_dict() 
Example #27
Source File: tests.py    From pyspark-elastic with Apache License 2.0 5 votes vote down vote up
def test_save_exclude_fields(self):
        docs = [
            dict(title='1', body='a'),
            dict(title='2', body='b'),
            dict(title='1', body='c'),
        ]

        self.sc.parallelize(docs).saveToEs(self.resource, mapping_exclude='body')
        read = self.rdd().collect()
        self.assertEqual(len(read), 3)
        for doc in read:
            self.assertNotIn('body', doc)

#     def test_save_with_script(self):
#         # es.update.script
#         # es.update.script.lang
#         # es.update.script.params
#         pass
#
    # TODO
    # def test_autocreate_index(self):
    #     index = Index('pyspark_elastic_non_existing')
    #     index.delete(ignore=404)
    #
    #     def save():
    #         self.docs.saveToEs(index._name + '/doc_type', index_auto_create='no')
    #     self.assertRaises(Exception, save) 
Example #28
Source File: tests.py    From pyspark-elastic with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        self.index = index = Index('pyspark_elastic')
        index.settings(number_of_shards=4)
        index.create(ignore=400)

        index.doc_type(self.TestDoc)

        self.resource = self.index._name + '/' + self.TestDoc._doc_type.name 
Example #29
Source File: elasticsearch.py    From qb with MIT License 5 votes vote down vote up
def __init__(self, name='qb', similarity='default', bm25_b=None, bm25_k1=None):
        self.name = name
        self.ix = Index(self.name)
        self.answer_doc = create_doctype(self.name, similarity)
        if bm25_b is None:
            bm25_b = .75
        if bm25_k1 is None:
            bm25_k1 = 1.2
        self.bm25_b = bm25_b
        self.bm25_k1 = bm25_k1 
Example #30
Source File: elasticsearch.py    From qb with MIT License 5 votes vote down vote up
def build_large_docs(self, documents: Dict[str, str], use_wiki=True, use_qb=True, rebuild_index=False):
        if rebuild_index or bool(int(os.getenv('QB_REBUILD_INDEX', 0))):
            log.info(f'Deleting index: {self.name}')
            self.delete()

        if self.exists():
            log.info(f'Index {self.name} exists')
        else:
            log.info(f'Index {self.name} does not exist')
            self.init()
            wiki_lookup = Wikipedia()
            log.info('Indexing questions and corresponding wikipedia pages as large docs...')
            for page in tqdm.tqdm(documents):
                if use_wiki and page in wiki_lookup:
                    wiki_content = wiki_lookup[page].text
                else:
                    wiki_content = ''

                if use_qb:
                    qb_content = documents[page]
                else:
                    qb_content = ''

                answer = self.answer_doc(
                    page=page,
                    wiki_content=wiki_content, qb_content=qb_content
                )
                answer.save(index=self.name)