Python Examples of elasticsearch

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

6 votes

def test_index_template_works(write_client):
    it = IndexTemplate('test-template', 'test-*')
    it.document(Post)
    it.settings(number_of_replicas=0, number_of_shards=1)
    it.save()

    i = Index('test-blog')
    i.create()

    assert {
        'test-blog': {
            'mappings': {
                'properties': {
                    'title': {'type': 'text', 'analyzer': 'my_analyzer'},
                    'published_from': {'type': 'date'},
                }
            }
        }
    } == write_client.indices.get_mapping(index='test-blog')

Source File: index.py From series-tiempo-ar-api with MIT License

6 votes

def add_analyzer(index: Index):
    """Agrega un nuevo analyzer al índice, disponible para ser usado
    en todos sus fields. El analyzer aplica lower case + ascii fold:
    quita acentos y uso de ñ, entre otros, para permitir búsqueda de
    texto en español
    """

    synonyms = list(Synonym.objects.values_list('terms', flat=True))

    filters = ['lowercase', 'asciifolding']
    if synonyms:
        filters.append(token_filter(constants.SYNONYM_FILTER,
                                    type='synonym',
                                    synonyms=synonyms))

    index.analyzer(
        analyzer(constants.ANALYZER,
                 tokenizer='standard',
                 filter=filters)
    )

Source File: ElasticBurp.py From WASE with GNU General Public License v3.0

6 votes

def applyConfig(self):
        try:
            print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex))
            self.es = connections.create_connection(hosts=[self.confESHost])
            self.idx = Index(self.confESIndex)
            self.idx.doc_type(DocHTTPRequestResponse)
            if self.idx.exists():
                self.idx.open()
            else:
                self.idx.create()
            self.callbacks.saveExtensionSetting("elasticburp.host", self.confESHost)
            self.callbacks.saveExtensionSetting("elasticburp.index", self.confESIndex)
            self.callbacks.saveExtensionSetting("elasticburp.tools", str(self.confBurpTools))
            self.callbacks.saveExtensionSetting("elasticburp.onlyresp", str(int(self.confBurpOnlyResp)))
        except Exception as e:
            JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)

    ### ITab ###

Source File: index.py From series-tiempo-ar-api with MIT License

6 votes

def tseries_index(name: str) -> Index:
    index = Index(name)

    # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería
    # la cantidad de valores máximas que puede tener una única serie temporal.
    index.settings(max_result_window=settings.MAX_SERIES_VALUES)

    if not index.exists():
        index.create()
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    index.save()
    # Actualizo el mapping
    mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE)

    doc_properties = mapping[name]['mappings'][settings.TS_DOC_TYPE]['properties']
    if not doc_properties.get('raw_value'):
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    return index

Source File: popularity.py From series-tiempo-ar-api with MIT License

6 votes

def update_popularity_metadata(distribution: Distribution):
    if not Index(SeriesQuery._doc_type.index).exists():
        return

    series = SeriesRepository.get_available_series(distribution=distribution)

    series_ids = series.values_list('identifier', flat=True)

    if not series_ids:
        return

    for meta_key, days in KEY_DAYS_PAIRS:
        s = SeriesQuery.search()
        if days:
            s = s.filter('range', timestamp={'gte': f'now-{days}d/d'})
        buckets = {serie_id: get_serie_filter(serie_id) for serie_id in series_ids}
        agg_result = popularity_aggregation(s, buckets)

        update_series_popularity_metadata(agg_result, meta_key, series)

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

6 votes

def test_multiple_indices_with_same_doc_type_work(write_client):
    i1 = Index('test-index-1', using=write_client)
    i2 = Index('test-index-2', using=write_client)

    for i in (i1, i2):
        i.document(Post)
        i.create()

    for i in ('test-index-1', 'test-index-2'):
        settings = write_client.indices.get_settings(index=i)
        assert settings[i]['settings']['index']['analysis'] == {
            'analyzer': {
                'my_analyzer': {
                    'type': 'custom',
                    'tokenizer': 'keyword'
                }
            }
        }

Source File: indicator.py From bearded-avenger with Mozilla Public License 2.0

6 votes

def _create_index(self):
        # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json
        # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk

        # every time we check it does a HEAD req
        if self.last_index_value and (datetime.utcnow() - self.last_index_check) < timedelta(minutes=2):
            return self.last_index_value

        idx = self._current_index()

        if not self.handle.indices.exists(idx):
            index = Index(idx)
            index.aliases(live={})
            index.doc_type(Indicator)
            index.settings(max_result_window=WINDOW_LIMIT)
            index.create()
            self.handle.indices.flush(idx)

        self.last_index_check = datetime.utcnow()
        self.last_index_value = idx
        return idx

Source File: elastic_logs.py From quay with Apache License 2.0

6 votes

def init(cls, index_prefix, index_settings=None, skip_template_init=False):
        """
        Create the index template, and populate LogEntry's mapping and index settings.
        """
        wildcard_index = Index(name=index_prefix + "*")
        wildcard_index.settings(**(index_settings or {}))
        wildcard_index.document(cls)
        cls._index = wildcard_index
        cls._index_prefix = index_prefix

        if not skip_template_init:
            cls.create_or_update_template()

        # Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level,
        # this function needs to be called first before being able to call `save`.
        cls._initialized = True

Source File: tests.py From pyspark-elastic with Apache License 2.0

6 votes

def test_dynamic_resource(self):
        Index('test-1').delete(ignore=404)
        Index('test-2').delete(ignore=404)

        docs1 = [
            dict(idx='test-1', body='something'),
            dict(idx='test-1', body='else'),
        ]
        docs2 = [
            dict(idx='test-2', body='abra'),
            dict(idx='test-2', body='ca'),
            dict(idx='test-2', body='dabra'),
        ]

        self.sc.parallelize(docs1 + docs2).saveToEs(resource_write='{idx}/docs')
        self.assertEqual(self.sc.esRDD('test-1/docs').count(), 2)
        self.assertEqual(self.sc.esRDD('test-2/docs').count(), 3)

        self.assertEqual(
            set(d['body'] for d in self.sc.esRDD('test-1/docs').loads().collectAsMap().values()),
            set(d['body'] for d in docs1)
        )

Source File: es_config.py From georef-ar-api with MIT License

5 votes

def create_index(es, name, doc_class, shards, replicas, synonyms=None,
                 excluding_terms=None):
    """Crea un índice Elasticsearch utilizando un nombre y una clase de
    documento.

    Args:
        es (elasticsearch.Elasticsearch): Cliente Elasticsearch.
        name (str): Nombre del índice a crear.
        doc_class (type): Clase del documento (debe heredar de Document).
        shards (int): Cantidad de "shards" a utilizar para el índice.
        replicas (int): Cantidad de réplicas por "shard".
        synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el
            analizador 'name_analyzer_synonyms'.
        excluding_terms (list): Lista de términos excluyentes a utilizar en
            caso de necesitar el analizador 'name_analyzer_excluding_terms'.

    """
    index = Index(name)

    # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió
    # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador
    # en algún punto de su mapeo, la lista 'synonyms' debería estar presente.
    if synonyms is not None:
        index.analyzer(gen_name_analyzer_synonyms(synonyms))

    # Mismo razonamiento que con 'name_analyzer_synonyms'.
    if excluding_terms is not None:
        index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms))

    index.document(doc_class)
    index.settings(number_of_shards=shards, number_of_replicas=replicas)
    index.create(using=es)

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_index_can_be_created_with_settings_and_mappings(write_client):
    i = Index('test-blog', using=write_client)
    i.document(Post)
    i.settings(number_of_replicas=0, number_of_shards=1)
    i.create()

    assert {
        'test-blog': {
            'mappings': {
                'properties': {
                    'title': {'type': 'text', 'analyzer': 'my_analyzer'},
                    'published_from': {'type': 'date'}
                }
            }
        }
    } == write_client.indices.get_mapping(index='test-blog')

    settings = write_client.indices.get_settings(index='test-blog')
    assert settings['test-blog']['settings']['index']['number_of_replicas'] == '0'
    assert settings['test-blog']['settings']['index']['number_of_shards'] == '1'
    assert settings['test-blog']['settings']['index']['analysis'] == {
        'analyzer': {
            'my_analyzer': {
                'type': 'custom',
                'tokenizer': 'keyword'
            }
        }
    }

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_delete(write_client):
    write_client.indices.create(
        index='test-index',
        body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}}
    )

    i = Index('test-index', using=write_client)
    i.delete()
    assert not write_client.indices.exists(index='test-index')

Source File: indexer.py From open-ledger with MIT License

5 votes

def index_all_images(self, chunk_size=DEFAULT_CHUNK_SIZE, num_iterations=DEFAULT_NUM_ITERATIONS,
                         num_threads=DEFAULT_NUM_THREADS):
        """Index every record in the database with a server-side cursor"""
        index = Index(settings.ELASTICSEARCH_INDEX)
        if not index.exists():
            log.info("Creating new index %s", settings.ELASTICSEARCH_INDEX)
            search.Image.init()
            mapping = search.Image._doc_type.mapping
            mapping.save(settings.ELASTICSEARCH_INDEX)
            log.info("Done creating new index")

        with Pool(num_threads) as pool:
            starts = [i * chunk_size for i in range(0, num_iterations)]
            pool.starmap(do_index, zip(starts, itertools.repeat(chunk_size, len(starts))))

Source File: test_search.py From open-ledger with MIT License

5 votes

def tearDown(self):
        index = Index(settings.ELASTICSEARCH_INDEX)
        index.delete(ignore=404)

Source File: test_search.py From open-ledger with MIT License

5 votes

def _index_img(self, img):
        """Index a single img and ensure that it's been propagated to the search engine"""
        image = search.db_image_to_index(img)
        image.save()
        index = Index(name=settings.ELASTICSEARCH_INDEX)
        index.flush(force=True)
        index.refresh()

Source File: tor_elasticsearch.py From freshonions-torscraper with GNU Affero General Public License v3.0

5 votes

def migrate():
    hidden_services = Index('hiddenservices')
    hidden_services.delete(ignore=404)
    hidden_services = Index('hiddenservices')
    hidden_services.doc_type(DomainDocType)
    hidden_services.doc_type(PageDocType)
    hidden_services.settings(number_of_shards=8, number_of_replicas=1)
    hidden_services.create()

Source File: admin_index_test.py From hepdata with GNU General Public License v2.0

5 votes

def test_recreate_index(admin_idx):
    admin_idx.recreate_index()

    index = Index(admin_idx.index)
    assert (index.exists())

Source File: zelasticsearch.py From csirtg-smrt-v1 with Mozilla Public License 2.0

5 votes

def _create_index(self):
        dt = datetime.utcnow()
        dt = dt.strftime('%Y.%m')
        es = connections.get_connection()
        if not es.indices.exists('indicators-{}'.format(dt)):
            index = Index('indicators-{}'.format(dt))
            index.aliases(live={})
            index.doc_type(Indicator)
            index.create()

            m = Mapping('indicator')
            m.field('indicator_ipv4', 'ip')
            m.field('indicator_ipv4_mask', 'integer')
            m.save('indicators-{}'.format(dt))
        return 'indicators-{}'.format(dt)

Source File: index.py From series-tiempo-ar-api with MIT License

5 votes

def __init__(self, index: str = SERIES_QUERY_INDEX_NAME):
        self.es_index = Index(index)
        self.es_index.doc_type(SeriesQuery)
        self.es_connection = connections.get_connection()

Source File: indexer_tests.py From series-tiempo-ar-api with MIT License

5 votes

def setUpClass(cls):
        super(IndexerTests, cls).setUpClass()
        Catalog.objects.all().delete()
        fake = faker.Faker()

        cls.fake_index = Index(fake.pystr(max_chars=50).lower())
        add_analyzer(cls.fake_index)

Source File: api.py From hepdata with GNU General Public License v2.0

5 votes

def recreate_index(self):
        """ Delete and then create a given index and set a default mapping.

        :param index: [string] name of the index. If None a default is used
        """
        submission = Index(self.index)
        submission.delete(ignore=404)

        ESSubmission.init(self.index)

Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_from_es_respects_underscored_non_meta_fields():
    doc = {
        "_index": "test-index",
        "_id": "elasticsearch",
        "_score": 12.0,

        "fields": {
            "hello": "world",
            "_routing": "es",
            "_tags": ["search"]

        },

        "_source": {
            "city": "Amsterdam",
            "name": "Elasticsearch",
            "_tagline": "You know, for search"
        }
    }

    class Company(document.Document):
        class Index:
            name = 'test-company'

    c = Company.from_es(doc)

    assert c.meta.fields._tags == ['search']
    assert c.meta.fields._routing == 'es'
    assert c._tagline == 'You know, for search'

Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_inherited_doc_types_can_override_index():
    class MyDocDifferentIndex(MySubDoc):
        class Index:
            name = 'not-default-index'
            settings = {
                'number_of_replicas': 0
            }
            aliases = {'a': {}}
            analyzers = [analyzer('my_analizer', tokenizer='keyword')]

    assert MyDocDifferentIndex._index._name == 'not-default-index'
    assert MyDocDifferentIndex()._get_index() == 'not-default-index'
    assert MyDocDifferentIndex._index.to_dict() == {
        'aliases': {'a': {}},
        'mappings': {
            'properties': {
                'created_at': {'type': 'date'},
                'inner': {
                    'type': 'object',
                    'properties': {
                        'old_field': {'type': 'text'}
                    },
                },
                'name': {'type': 'keyword'},
                'title': {'type': 'keyword'}
            }
        },
        'settings': {
            'analysis': {
                'analyzer': {
                    'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'}
                }
            },
            'number_of_replicas': 0
        }
    }

Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_matches_accepts_wildcards():
    class MyDoc(document.Document):
        class Index:
            name = 'my-*'

    assert MyDoc._matches({
        '_index': 'my-index'
    })
    assert not MyDoc._matches({
        '_index': 'not-my-index'
    })

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_index_exists(data_client):
    assert Index('git').exists()
    assert not Index('not-there').exists()

Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_conflicting_mapping_raises_error_in_index_to_dict():
    class A(document.Document):
        name = field.Text()

    class B(document.Document):
        name = field.Keyword()

    i = Index('i')
    i.document(A)
    i.document(B)

    with raises(ValueError):
        i.to_dict()

Source File: tests.py From pyspark-elastic with Apache License 2.0

5 votes

def test_save_exclude_fields(self):
        docs = [
            dict(title='1', body='a'),
            dict(title='2', body='b'),
            dict(title='1', body='c'),
        ]

        self.sc.parallelize(docs).saveToEs(self.resource, mapping_exclude='body')
        read = self.rdd().collect()
        self.assertEqual(len(read), 3)
        for doc in read:
            self.assertNotIn('body', doc)

#     def test_save_with_script(self):
#         # es.update.script
#         # es.update.script.lang
#         # es.update.script.params
#         pass
#
    # TODO
    # def test_autocreate_index(self):
    #     index = Index('pyspark_elastic_non_existing')
    #     index.delete(ignore=404)
    #
    #     def save():
    #         self.docs.saveToEs(index._name + '/doc_type', index_auto_create='no')
    #     self.assertRaises(Exception, save)

Source File: tests.py From pyspark-elastic with Apache License 2.0

5 votes

def setUp(self):
        self.index = index = Index('pyspark_elastic')
        index.settings(number_of_shards=4)
        index.create(ignore=400)

        index.doc_type(self.TestDoc)

        self.resource = self.index._name + '/' + self.TestDoc._doc_type.name

Source File: elasticsearch.py From qb with MIT License

5 votes

def __init__(self, name='qb', similarity='default', bm25_b=None, bm25_k1=None):
        self.name = name
        self.ix = Index(self.name)
        self.answer_doc = create_doctype(self.name, similarity)
        if bm25_b is None:
            bm25_b = .75
        if bm25_k1 is None:
            bm25_k1 = 1.2
        self.bm25_b = bm25_b
        self.bm25_k1 = bm25_k1

Source File: elasticsearch.py From qb with MIT License

5 votes

def build_large_docs(self, documents: Dict[str, str], use_wiki=True, use_qb=True, rebuild_index=False):
        if rebuild_index or bool(int(os.getenv('QB_REBUILD_INDEX', 0))):
            log.info(f'Deleting index: {self.name}')
            self.delete()

        if self.exists():
            log.info(f'Index {self.name} exists')
        else:
            log.info(f'Index {self.name} does not exist')
            self.init()
            wiki_lookup = Wikipedia()
            log.info('Indexing questions and corresponding wikipedia pages as large docs...')
            for page in tqdm.tqdm(documents):
                if use_wiki and page in wiki_lookup:
                    wiki_content = wiki_lookup[page].text
                else:
                    wiki_content = ''

                if use_qb:
                    qb_content = documents[page]
                else:
                    qb_content = ''

                answer = self.answer_doc(
                    page=page,
                    wiki_content=wiki_content, qb_content=qb_content
                )
                answer.save(index=self.name)

Python elasticsearch_dsl.Index() Examples