Python elasticsearch_dsl.analyzer() Examples

The following are 15 code examples of elasticsearch_dsl.analyzer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch_dsl , or try the search function .
Example #1
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 6 votes vote down vote up
def test_cloned_index_has_analysis_attribute():
    """
    Regression test for Issue #582 in which `Index.clone()` was not copying
    over the `_analysis` attribute.
    """
    client = object()
    i = Index('my-index', using=client)

    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")

    i.analyzer(random_analyzer)

    i2 = i.clone('my-clone-index')

    assert i.to_dict()['settings']['analysis'] == i2.to_dict()['settings']['analysis'] 
Example #2
Source File: index.py    From series-tiempo-ar-api with MIT License 6 votes vote down vote up
def add_analyzer(index: Index):
    """Agrega un nuevo analyzer al índice, disponible para ser usado
    en todos sus fields. El analyzer aplica lower case + ascii fold:
    quita acentos y uso de ñ, entre otros, para permitir búsqueda de
    texto en español
    """

    synonyms = list(Synonym.objects.values_list('terms', flat=True))

    filters = ['lowercase', 'asciifolding']
    if synonyms:
        filters.append(token_filter(constants.SYNONYM_FILTER,
                                    type='synonym',
                                    synonyms=synonyms))

    index.analyzer(
        analyzer(constants.ANALYZER,
                 tokenizer='standard',
                 filter=filters)
    ) 
Example #3
Source File: fields.py    From resolwe with Apache License 2.0 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("fields", {})["ngrams"] = {
            "type": "text",
            "analyzer": ngrams_analyzer,
            "search_analyzer": ngrams_search_analyzer,
        }
        super().__init__(*args, **kwargs) 
Example #4
Source File: fields.py    From resolwe with Apache License 2.0 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("analyzer", name_analyzer)
        super().__init__(*args, **kwargs) 
Example #5
Source File: fields.py    From resolwe with Apache License 2.0 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        """Construct field."""
        kwargs.setdefault("analyzer", process_type_analyzer)
        kwargs.setdefault("search_analyzer", process_type_search_analyzer)
        super().__init__(*args, **kwargs) 
Example #6
Source File: test_document.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_inherited_doc_types_can_override_index():
    class MyDocDifferentIndex(MySubDoc):
        class Index:
            name = 'not-default-index'
            settings = {
                'number_of_replicas': 0
            }
            aliases = {'a': {}}
            analyzers = [analyzer('my_analizer', tokenizer='keyword')]

    assert MyDocDifferentIndex._index._name == 'not-default-index'
    assert MyDocDifferentIndex()._get_index() == 'not-default-index'
    assert MyDocDifferentIndex._index.to_dict() == {
        'aliases': {'a': {}},
        'mappings': {
            'properties': {
                'created_at': {'type': 'date'},
                'inner': {
                    'type': 'object',
                    'properties': {
                        'old_field': {'type': 'text'}
                    },
                },
                'name': {'type': 'keyword'},
                'title': {'type': 'keyword'}
            }
        },
        'settings': {
            'analysis': {
                'analyzer': {
                    'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'}
                }
            },
            'number_of_replicas': 0
        }
    } 
Example #7
Source File: test_analysis.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_simulate_with_just__builtin_tokenizer(client):
    a = analyzer('my-analyzer', tokenizer='keyword')
    tokens = a.simulate('Hello World!', using=client).tokens

    assert len(tokens) == 1
    assert tokens[0].token == 'Hello World!' 
Example #8
Source File: test_analysis.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_simulate_complex(client):
    a = analyzer('my-analyzer',
                 tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'),
                 filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])])

    tokens = a.simulate('if:this:works', using=client).tokens

    assert len(tokens) == 2
    assert ['this', 'works'] == [t.token for t in tokens] 
Example #9
Source File: test_analysis.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_simulate_builtin(client):
    a = analyzer('my-analyzer', 'english')
    tokens = a.simulate('fixes running').tokens

    assert ['fix', 'run'] == [t.token for t in tokens] 
Example #10
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_analyzers_returned_from_to_dict():
    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")
    index = Index('i', using='alias')
    index.analyzer(random_analyzer)

    assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} 
Example #11
Source File: test_index.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_conflicting_analyzer_raises_error():
    i = Index('i')
    i.analyzer('my_analyzer', tokenizer='whitespace', filter=['lowercase', 'stop'])

    with raises(ValueError):
        i.analyzer('my_analyzer', tokenizer='keyword', filter=['lowercase', 'stop']) 
Example #12
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def gen_name_analyzer_synonyms(synonyms):
    """Crea un analizador para nombres con sinónimos.

    Args:
        synonyms (list): Lista de sinónimos a utilizar, en formato Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_synonyms'.

    """
    name_synonyms_filter = token_filter(
        'name_synonyms_filter',
        type='synonym',
        synonyms=synonyms
    )

    return analyzer(
        name_analyzer_synonyms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_synonyms_filter,
            spanish_stopwords_filter
        ]
    ) 
Example #13
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def gen_name_analyzer_excluding_terms(excluding_terms):
    """Crea un analizador para nombres que sólo retorna TE (términos
    excluyentes).

    Por ejemplo, si el archivo de configuración de TE contiene las siguientes
    reglas:

    santa, salta, santo
    caba, cba

    Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar
    'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'.

    El analizador se utiliza para excluir resultados de búsquedas específicas.

    Args:
        excluding_terms (list): Lista de TE a utilizar especificados como
            sinónimos Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_excluding_terms'.

    """
    name_excluding_terms_filter = token_filter(
        'name_excluding_terms_filter',
        type='synonym',
        synonyms=excluding_terms
    )

    return analyzer(
        name_analyzer_excluding_terms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_excluding_terms_filter,
            synonyms_only_filter,
            spanish_stopwords_filter
        ]
    ) 
Example #14
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def create_index(es, name, doc_class, shards, replicas, synonyms=None,
                 excluding_terms=None):
    """Crea un índice Elasticsearch utilizando un nombre y una clase de
    documento.

    Args:
        es (elasticsearch.Elasticsearch): Cliente Elasticsearch.
        name (str): Nombre del índice a crear.
        doc_class (type): Clase del documento (debe heredar de Document).
        shards (int): Cantidad de "shards" a utilizar para el índice.
        replicas (int): Cantidad de réplicas por "shard".
        synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el
            analizador 'name_analyzer_synonyms'.
        excluding_terms (list): Lista de términos excluyentes a utilizar en
            caso de necesitar el analizador 'name_analyzer_excluding_terms'.

    """
    index = Index(name)

    # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió
    # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador
    # en algún punto de su mapeo, la lista 'synonyms' debería estar presente.
    if synonyms is not None:
        index.analyzer(gen_name_analyzer_synonyms(synonyms))

    # Mismo razonamiento que con 'name_analyzer_synonyms'.
    if excluding_terms is not None:
        index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms))

    index.document(doc_class)
    index.settings(number_of_shards=shards, number_of_replicas=replicas)
    index.create(using=es) 
Example #15
Source File: documents.py    From libreborme with GNU Affero General Public License v3.0 5 votes vote down vote up
def configure_index(idx):
    """Configure ES index settings.

    NOTE: This is unused at the moment. Current issues:
    1. The index needs to be created (index.create() or search_index --create)
    setting update_all_types=True because of the attribute name being the same
    in Person and Company.
    https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create

    name = fields.TextField(attr="fullname", analyzer=lb_analyzer)

    2. How to specifiy token filter for an attribute?

    Therefore the index needs to be configured outside Django.
    """
    idx.settings(number_of_shards=1, number_of_replicas=0)
    lb_filter = token_filter(
        "lb_filter",
        "stop",
        stopwords=["i"]
    )
    lb_analyzer = analyzer(
        "lb_analyzer",
        tokenizer="standard",
        filter=["standard", "lb_filter", "asciifolding", "lowercase"]
    )
    return lb_analyzer, lb_filter