Python Examples of elasticsearch

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

6 votes

def test_cloned_index_has_analysis_attribute():
    """
    Regression test for Issue #582 in which `Index.clone()` was not copying
    over the `_analysis` attribute.
    """
    client = object()
    i = Index('my-index', using=client)

    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")

    i.analyzer(random_analyzer)

    i2 = i.clone('my-clone-index')

    assert i.to_dict()['settings']['analysis'] == i2.to_dict()['settings']['analysis']

Source File: index.py From series-tiempo-ar-api with MIT License

6 votes

def add_analyzer(index: Index):
    """Agrega un nuevo analyzer al índice, disponible para ser usado
    en todos sus fields. El analyzer aplica lower case + ascii fold:
    quita acentos y uso de ñ, entre otros, para permitir búsqueda de
    texto en español
    """

    synonyms = list(Synonym.objects.values_list('terms', flat=True))

    filters = ['lowercase', 'asciifolding']
    if synonyms:
        filters.append(token_filter(constants.SYNONYM_FILTER,
                                    type='synonym',
                                    synonyms=synonyms))

    index.analyzer(
        analyzer(constants.ANALYZER,
                 tokenizer='standard',
                 filter=filters)
    )

Source File: fields.py From resolwe with Apache License 2.0

5 votes

def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("fields", {})["ngrams"] = {
            "type": "text",
            "analyzer": ngrams_analyzer,
            "search_analyzer": ngrams_search_analyzer,
        }
        super().__init__(*args, **kwargs)

Source File: fields.py From resolwe with Apache License 2.0

5 votes

def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("analyzer", name_analyzer)
        super().__init__(*args, **kwargs)

Source File: fields.py From resolwe with Apache License 2.0

5 votes

def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("analyzer", process_type_analyzer)
        kwargs.setdefault("search_analyzer", process_type_search_analyzer)
        super().__init__(*args, **kwargs)

Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_inherited_doc_types_can_override_index():
    class MyDocDifferentIndex(MySubDoc):
        class Index:
            name = 'not-default-index'
            settings = {
                'number_of_replicas': 0
            }
            aliases = {'a': {}}
            analyzers = [analyzer('my_analizer', tokenizer='keyword')]

    assert MyDocDifferentIndex._index._name == 'not-default-index'
    assert MyDocDifferentIndex()._get_index() == 'not-default-index'
    assert MyDocDifferentIndex._index.to_dict() == {
        'aliases': {'a': {}},
        'mappings': {
            'properties': {
                'created_at': {'type': 'date'},
                'inner': {
                    'type': 'object',
                    'properties': {
                        'old_field': {'type': 'text'}
                    },
                },
                'name': {'type': 'keyword'},
                'title': {'type': 'keyword'}
            }
        },
        'settings': {
            'analysis': {
                'analyzer': {
                    'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'}
                }
            },
            'number_of_replicas': 0
        }
    }

Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_simulate_with_just__builtin_tokenizer(client):
    a = analyzer('my-analyzer', tokenizer='keyword')
    tokens = a.simulate('Hello World!', using=client).tokens

    assert len(tokens) == 1
    assert tokens[0].token == 'Hello World!'

Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_simulate_complex(client):
    a = analyzer('my-analyzer',
                 tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'),
                 filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])])

    tokens = a.simulate('if:this:works', using=client).tokens

    assert len(tokens) == 2
    assert ['this', 'works'] == [t.token for t in tokens]

Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_simulate_builtin(client):
    a = analyzer('my-analyzer', 'english')
    tokens = a.simulate('fixes running').tokens

    assert ['fix', 'run'] == [t.token for t in tokens]

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_analyzers_returned_from_to_dict():
    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")
    index = Index('i', using='alias')
    index.analyzer(random_analyzer)

    assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}

Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0

5 votes

def test_conflicting_analyzer_raises_error():
    i = Index('i')
    i.analyzer('my_analyzer', tokenizer='whitespace', filter=['lowercase', 'stop'])

    with raises(ValueError):
        i.analyzer('my_analyzer', tokenizer='keyword', filter=['lowercase', 'stop'])

Source File: es_config.py From georef-ar-api with MIT License

5 votes

def gen_name_analyzer_synonyms(synonyms):
    """Crea un analizador para nombres con sinónimos.

    Args:
        synonyms (list): Lista de sinónimos a utilizar, en formato Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_synonyms'.

    """
    name_synonyms_filter = token_filter(
        'name_synonyms_filter',
        type='synonym',
        synonyms=synonyms
    )

    return analyzer(
        name_analyzer_synonyms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_synonyms_filter,
            spanish_stopwords_filter
        ]
    )

Source File: es_config.py From georef-ar-api with MIT License

5 votes

def gen_name_analyzer_excluding_terms(excluding_terms):
    """Crea un analizador para nombres que sólo retorna TE (términos
    excluyentes).

    Por ejemplo, si el archivo de configuración de TE contiene las siguientes
    reglas:

    santa, salta, santo
    caba, cba

    Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar
    'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'.

    El analizador se utiliza para excluir resultados de búsquedas específicas.

    Args:
        excluding_terms (list): Lista de TE a utilizar especificados como
            sinónimos Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_excluding_terms'.

    """
    name_excluding_terms_filter = token_filter(
        'name_excluding_terms_filter',
        type='synonym',
        synonyms=excluding_terms
    )

    return analyzer(
        name_analyzer_excluding_terms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_excluding_terms_filter,
            synonyms_only_filter,
            spanish_stopwords_filter
        ]
    )

Source File: es_config.py From georef-ar-api with MIT License

5 votes

def create_index(es, name, doc_class, shards, replicas, synonyms=None,
                 excluding_terms=None):
    """Crea un índice Elasticsearch utilizando un nombre y una clase de
    documento.

    Args:
        es (elasticsearch.Elasticsearch): Cliente Elasticsearch.
        name (str): Nombre del índice a crear.
        doc_class (type): Clase del documento (debe heredar de Document).
        shards (int): Cantidad de "shards" a utilizar para el índice.
        replicas (int): Cantidad de réplicas por "shard".
        synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el
            analizador 'name_analyzer_synonyms'.
        excluding_terms (list): Lista de términos excluyentes a utilizar en
            caso de necesitar el analizador 'name_analyzer_excluding_terms'.

    """
    index = Index(name)

    # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió
    # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador
    # en algún punto de su mapeo, la lista 'synonyms' debería estar presente.
    if synonyms is not None:
        index.analyzer(gen_name_analyzer_synonyms(synonyms))

    # Mismo razonamiento que con 'name_analyzer_synonyms'.
    if excluding_terms is not None:
        index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms))

    index.document(doc_class)
    index.settings(number_of_shards=shards, number_of_replicas=replicas)
    index.create(using=es)

Source File: documents.py From libreborme with GNU Affero General Public License v3.0

5 votes

def configure_index(idx):
    """Configure ES index settings.

    NOTE: This is unused at the moment. Current issues:
    1. The index needs to be created (index.create() or search_index --create)
    setting update_all_types=True because of the attribute name being the same
    in Person and Company.
    https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create

    name = fields.TextField(attr="fullname", analyzer=lb_analyzer)

    2. How to specifiy token filter for an attribute?

    Therefore the index needs to be configured outside Django.
    """
    idx.settings(number_of_shards=1, number_of_replicas=0)
    lb_filter = token_filter(
        "lb_filter",
        "stop",
        stopwords=["i"]
    )
    lb_analyzer = analyzer(
        "lb_analyzer",
        tokenizer="standard",
        filter=["standard", "lb_filter", "asciifolding", "lowercase"]
    )
    return lb_analyzer, lb_filter

Python elasticsearch_dsl.analyzer() Examples