Python elasticsearch_dsl.token_filter() Examples

The following are 5 code examples of elasticsearch_dsl.token_filter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch_dsl , or try the search function .
Example #1
Source File: index.py    From series-tiempo-ar-api with MIT License 6 votes vote down vote up
def add_analyzer(index: Index):
    """Agrega un nuevo analyzer al índice, disponible para ser usado
    en todos sus fields. El analyzer aplica lower case + ascii fold:
    quita acentos y uso de ñ, entre otros, para permitir búsqueda de
    texto en español
    """

    synonyms = list(Synonym.objects.values_list('terms', flat=True))

    filters = ['lowercase', 'asciifolding']
    if synonyms:
        filters.append(token_filter(constants.SYNONYM_FILTER,
                                    type='synonym',
                                    synonyms=synonyms))

    index.analyzer(
        analyzer(constants.ANALYZER,
                 tokenizer='standard',
                 filter=filters)
    ) 
Example #2
Source File: test_analysis.py    From elasticsearch-dsl-py with Apache License 2.0 5 votes vote down vote up
def test_simulate_complex(client):
    a = analyzer('my-analyzer',
                 tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'),
                 filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])])

    tokens = a.simulate('if:this:works', using=client).tokens

    assert len(tokens) == 2
    assert ['this', 'works'] == [t.token for t in tokens] 
Example #3
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def gen_name_analyzer_synonyms(synonyms):
    """Crea un analizador para nombres con sinónimos.

    Args:
        synonyms (list): Lista de sinónimos a utilizar, en formato Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_synonyms'.

    """
    name_synonyms_filter = token_filter(
        'name_synonyms_filter',
        type='synonym',
        synonyms=synonyms
    )

    return analyzer(
        name_analyzer_synonyms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_synonyms_filter,
            spanish_stopwords_filter
        ]
    ) 
Example #4
Source File: es_config.py    From georef-ar-api with MIT License 5 votes vote down vote up
def gen_name_analyzer_excluding_terms(excluding_terms):
    """Crea un analizador para nombres que sólo retorna TE (términos
    excluyentes).

    Por ejemplo, si el archivo de configuración de TE contiene las siguientes
    reglas:

    santa, salta, santo
    caba, cba

    Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar
    'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'.

    El analizador se utiliza para excluir resultados de búsquedas específicas.

    Args:
        excluding_terms (list): Lista de TE a utilizar especificados como
            sinónimos Solr.

    Returns:
        elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
            'name_analyzer_excluding_terms'.

    """
    name_excluding_terms_filter = token_filter(
        'name_excluding_terms_filter',
        type='synonym',
        synonyms=excluding_terms
    )

    return analyzer(
        name_analyzer_excluding_terms,
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            name_excluding_terms_filter,
            synonyms_only_filter,
            spanish_stopwords_filter
        ]
    ) 
Example #5
Source File: documents.py    From libreborme with GNU Affero General Public License v3.0 5 votes vote down vote up
def configure_index(idx):
    """Configure ES index settings.

    NOTE: This is unused at the moment. Current issues:
    1. The index needs to be created (index.create() or search_index --create)
    setting update_all_types=True because of the attribute name being the same
    in Person and Company.
    https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create

    name = fields.TextField(attr="fullname", analyzer=lb_analyzer)

    2. How to specifiy token filter for an attribute?

    Therefore the index needs to be configured outside Django.
    """
    idx.settings(number_of_shards=1, number_of_replicas=0)
    lb_filter = token_filter(
        "lb_filter",
        "stop",
        stopwords=["i"]
    )
    lb_analyzer = analyzer(
        "lb_analyzer",
        tokenizer="standard",
        filter=["standard", "lb_filter", "asciifolding", "lowercase"]
    )
    return lb_analyzer, lb_filter