Python elasticsearch_dsl.analyzer() Examples
The following are 15
code examples of elasticsearch_dsl.analyzer().
Example #1
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 6 votes |
def test_cloned_index_has_analysis_attribute(): """ Regression test for Issue #582 in which `Index.clone()` was not copying over the `_analysis` attribute. """ client = object() i = Index('my-index', using=client) random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") i.analyzer(random_analyzer) i2 = i.clone('my-clone-index') assert i.to_dict()['settings']['analysis'] == i2.to_dict()['settings']['analysis']
Example #2
Source File: From series-tiempo-ar-api with MIT License | 6 votes |
def add_analyzer(index: Index): """Agrega un nuevo analyzer al índice, disponible para ser usado en todos sus fields. El analyzer aplica lower case + ascii fold: quita acentos y uso de ñ, entre otros, para permitir búsqueda de texto en español """ synonyms = list(Synonym.objects.values_list('terms', flat=True)) filters = ['lowercase', 'asciifolding'] if synonyms: filters.append(token_filter(constants.SYNONYM_FILTER, type='synonym', synonyms=synonyms)) index.analyzer( analyzer(constants.ANALYZER, tokenizer='standard', filter=filters) )
Example #3
Source File: From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("fields", {})["ngrams"] = { "type": "text", "analyzer": ngrams_analyzer, "search_analyzer": ngrams_search_analyzer, } super().__init__(*args, **kwargs)
Example #4
Source File: From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("analyzer", name_analyzer) super().__init__(*args, **kwargs)
Example #5
Source File: From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("analyzer", process_type_analyzer) kwargs.setdefault("search_analyzer", process_type_search_analyzer) super().__init__(*args, **kwargs)
Example #6
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_inherited_doc_types_can_override_index(): class MyDocDifferentIndex(MySubDoc): class Index: name = 'not-default-index' settings = { 'number_of_replicas': 0 } aliases = {'a': {}} analyzers = [analyzer('my_analizer', tokenizer='keyword')] assert MyDocDifferentIndex._index._name == 'not-default-index' assert MyDocDifferentIndex()._get_index() == 'not-default-index' assert MyDocDifferentIndex._index.to_dict() == { 'aliases': {'a': {}}, 'mappings': { 'properties': { 'created_at': {'type': 'date'}, 'inner': { 'type': 'object', 'properties': { 'old_field': {'type': 'text'} }, }, 'name': {'type': 'keyword'}, 'title': {'type': 'keyword'} } }, 'settings': { 'analysis': { 'analyzer': { 'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'} } }, 'number_of_replicas': 0 } }
Example #7
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_with_just__builtin_tokenizer(client): a = analyzer('my-analyzer', tokenizer='keyword') tokens = a.simulate('Hello World!', using=client).tokens assert len(tokens) == 1 assert tokens[0].token == 'Hello World!'
Example #8
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_complex(client): a = analyzer('my-analyzer', tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'), filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])]) tokens = a.simulate('if:this:works', using=client).tokens assert len(tokens) == 2 assert ['this', 'works'] == [t.token for t in tokens]
Example #9
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_builtin(client): a = analyzer('my-analyzer', 'english') tokens = a.simulate('fixes running').tokens assert ['fix', 'run'] == [t.token for t in tokens]
Example #10
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_analyzers_returned_from_to_dict(): random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") index = Index('i', using='alias') index.analyzer(random_analyzer) assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
Example #11
Source File: From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_conflicting_analyzer_raises_error(): i = Index('i') i.analyzer('my_analyzer', tokenizer='whitespace', filter=['lowercase', 'stop']) with raises(ValueError): i.analyzer('my_analyzer', tokenizer='keyword', filter=['lowercase', 'stop'])
Example #12
Source File: From georef-ar-api with MIT License | 5 votes |
def gen_name_analyzer_synonyms(synonyms): """Crea un analizador para nombres con sinónimos. Args: synonyms (list): Lista de sinónimos a utilizar, en formato Solr. Returns: elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre 'name_analyzer_synonyms'. """ name_synonyms_filter = token_filter( 'name_synonyms_filter', type='synonym', synonyms=synonyms ) return analyzer( name_analyzer_synonyms, tokenizer='standard', filter=[ 'lowercase', 'asciifolding', name_synonyms_filter, spanish_stopwords_filter ] )
Example #13
Source File: From georef-ar-api with MIT License | 5 votes |
def gen_name_analyzer_excluding_terms(excluding_terms): """Crea un analizador para nombres que sólo retorna TE (términos excluyentes). Por ejemplo, si el archivo de configuración de TE contiene las siguientes reglas: santa, salta, santo caba, cba Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar 'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'. El analizador se utiliza para excluir resultados de búsquedas específicas. Args: excluding_terms (list): Lista de TE a utilizar especificados como sinónimos Solr. Returns: elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre 'name_analyzer_excluding_terms'. """ name_excluding_terms_filter = token_filter( 'name_excluding_terms_filter', type='synonym', synonyms=excluding_terms ) return analyzer( name_analyzer_excluding_terms, tokenizer='standard', filter=[ 'lowercase', 'asciifolding', name_excluding_terms_filter, synonyms_only_filter, spanish_stopwords_filter ] )
Example #14
Source File: From georef-ar-api with MIT License | 5 votes |
def create_index(es, name, doc_class, shards, replicas, synonyms=None, excluding_terms=None): """Crea un índice Elasticsearch utilizando un nombre y una clase de documento. Args: es (elasticsearch.Elasticsearch): Cliente Elasticsearch. name (str): Nombre del índice a crear. doc_class (type): Clase del documento (debe heredar de Document). shards (int): Cantidad de "shards" a utilizar para el índice. replicas (int): Cantidad de réplicas por "shard". synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el analizador 'name_analyzer_synonyms'. excluding_terms (list): Lista de términos excluyentes a utilizar en caso de necesitar el analizador 'name_analyzer_excluding_terms'. """ index = Index(name) # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador # en algún punto de su mapeo, la lista 'synonyms' debería estar presente. if synonyms is not None: index.analyzer(gen_name_analyzer_synonyms(synonyms)) # Mismo razonamiento que con 'name_analyzer_synonyms'. if excluding_terms is not None: index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms)) index.document(doc_class) index.settings(number_of_shards=shards, number_of_replicas=replicas) index.create(using=es)
Example #15
Source File: From libreborme with GNU Affero General Public License v3.0 | 5 votes |
def configure_index(idx): """Configure ES index settings. NOTE: This is unused at the moment. Current issues: 1. The index needs to be created (index.create() or search_index --create) setting update_all_types=True because of the attribute name being the same in Person and Company. name = fields.TextField(attr="fullname", analyzer=lb_analyzer) 2. How to specifiy token filter for an attribute? Therefore the index needs to be configured outside Django. """ idx.settings(number_of_shards=1, number_of_replicas=0) lb_filter = token_filter( "lb_filter", "stop", stopwords=["i"] ) lb_analyzer = analyzer( "lb_analyzer", tokenizer="standard", filter=["standard", "lb_filter", "asciifolding", "lowercase"] ) return lb_analyzer, lb_filter