Python elasticsearch_dsl.analyzer() Examples
The following are 15
code examples of elasticsearch_dsl.analyzer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
elasticsearch_dsl
, or try the search function
.
Example #1
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 6 votes |
def test_cloned_index_has_analysis_attribute(): """ Regression test for Issue #582 in which `Index.clone()` was not copying over the `_analysis` attribute. """ client = object() i = Index('my-index', using=client) random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") i.analyzer(random_analyzer) i2 = i.clone('my-clone-index') assert i.to_dict()['settings']['analysis'] == i2.to_dict()['settings']['analysis']
Example #2
Source File: index.py From series-tiempo-ar-api with MIT License | 6 votes |
def add_analyzer(index: Index): """Agrega un nuevo analyzer al índice, disponible para ser usado en todos sus fields. El analyzer aplica lower case + ascii fold: quita acentos y uso de ñ, entre otros, para permitir búsqueda de texto en español """ synonyms = list(Synonym.objects.values_list('terms', flat=True)) filters = ['lowercase', 'asciifolding'] if synonyms: filters.append(token_filter(constants.SYNONYM_FILTER, type='synonym', synonyms=synonyms)) index.analyzer( analyzer(constants.ANALYZER, tokenizer='standard', filter=filters) )
Example #3
Source File: fields.py From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("fields", {})["ngrams"] = { "type": "text", "analyzer": ngrams_analyzer, "search_analyzer": ngrams_search_analyzer, } super().__init__(*args, **kwargs)
Example #4
Source File: fields.py From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("analyzer", name_analyzer) super().__init__(*args, **kwargs)
Example #5
Source File: fields.py From resolwe with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): """Construct field.""" kwargs.setdefault("analyzer", process_type_analyzer) kwargs.setdefault("search_analyzer", process_type_search_analyzer) super().__init__(*args, **kwargs)
Example #6
Source File: test_document.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_inherited_doc_types_can_override_index(): class MyDocDifferentIndex(MySubDoc): class Index: name = 'not-default-index' settings = { 'number_of_replicas': 0 } aliases = {'a': {}} analyzers = [analyzer('my_analizer', tokenizer='keyword')] assert MyDocDifferentIndex._index._name == 'not-default-index' assert MyDocDifferentIndex()._get_index() == 'not-default-index' assert MyDocDifferentIndex._index.to_dict() == { 'aliases': {'a': {}}, 'mappings': { 'properties': { 'created_at': {'type': 'date'}, 'inner': { 'type': 'object', 'properties': { 'old_field': {'type': 'text'} }, }, 'name': {'type': 'keyword'}, 'title': {'type': 'keyword'} } }, 'settings': { 'analysis': { 'analyzer': { 'my_analizer': {'tokenizer': 'keyword', 'type': 'custom'} } }, 'number_of_replicas': 0 } }
Example #7
Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_with_just__builtin_tokenizer(client): a = analyzer('my-analyzer', tokenizer='keyword') tokens = a.simulate('Hello World!', using=client).tokens assert len(tokens) == 1 assert tokens[0].token == 'Hello World!'
Example #8
Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_complex(client): a = analyzer('my-analyzer', tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'), filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])]) tokens = a.simulate('if:this:works', using=client).tokens assert len(tokens) == 2 assert ['this', 'works'] == [t.token for t in tokens]
Example #9
Source File: test_analysis.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_simulate_builtin(client): a = analyzer('my-analyzer', 'english') tokens = a.simulate('fixes running').tokens assert ['fix', 'run'] == [t.token for t in tokens]
Example #10
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_analyzers_returned_from_to_dict(): random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") index = Index('i', using='alias') index.analyzer(random_analyzer) assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
Example #11
Source File: test_index.py From elasticsearch-dsl-py with Apache License 2.0 | 5 votes |
def test_conflicting_analyzer_raises_error(): i = Index('i') i.analyzer('my_analyzer', tokenizer='whitespace', filter=['lowercase', 'stop']) with raises(ValueError): i.analyzer('my_analyzer', tokenizer='keyword', filter=['lowercase', 'stop'])
Example #12
Source File: es_config.py From georef-ar-api with MIT License | 5 votes |
def gen_name_analyzer_synonyms(synonyms): """Crea un analizador para nombres con sinónimos. Args: synonyms (list): Lista de sinónimos a utilizar, en formato Solr. Returns: elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre 'name_analyzer_synonyms'. """ name_synonyms_filter = token_filter( 'name_synonyms_filter', type='synonym', synonyms=synonyms ) return analyzer( name_analyzer_synonyms, tokenizer='standard', filter=[ 'lowercase', 'asciifolding', name_synonyms_filter, spanish_stopwords_filter ] )
Example #13
Source File: es_config.py From georef-ar-api with MIT License | 5 votes |
def gen_name_analyzer_excluding_terms(excluding_terms): """Crea un analizador para nombres que sólo retorna TE (términos excluyentes). Por ejemplo, si el archivo de configuración de TE contiene las siguientes reglas: santa, salta, santo caba, cba Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar 'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'. El analizador se utiliza para excluir resultados de búsquedas específicas. Args: excluding_terms (list): Lista de TE a utilizar especificados como sinónimos Solr. Returns: elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre 'name_analyzer_excluding_terms'. """ name_excluding_terms_filter = token_filter( 'name_excluding_terms_filter', type='synonym', synonyms=excluding_terms ) return analyzer( name_analyzer_excluding_terms, tokenizer='standard', filter=[ 'lowercase', 'asciifolding', name_excluding_terms_filter, synonyms_only_filter, spanish_stopwords_filter ] )
Example #14
Source File: es_config.py From georef-ar-api with MIT License | 5 votes |
def create_index(es, name, doc_class, shards, replicas, synonyms=None, excluding_terms=None): """Crea un índice Elasticsearch utilizando un nombre y una clase de documento. Args: es (elasticsearch.Elasticsearch): Cliente Elasticsearch. name (str): Nombre del índice a crear. doc_class (type): Clase del documento (debe heredar de Document). shards (int): Cantidad de "shards" a utilizar para el índice. replicas (int): Cantidad de réplicas por "shard". synonyms (list): Lista de sinónimos a utilizar en caso de necesitar el analizador 'name_analyzer_synonyms'. excluding_terms (list): Lista de términos excluyentes a utilizar en caso de necesitar el analizador 'name_analyzer_excluding_terms'. """ index = Index(name) # Crear el analizador 'name_analyzer_synonyms' solo si se lo pidió # explícitamente. Si el documento tipo 'doc_class' utiliza el analizador # en algún punto de su mapeo, la lista 'synonyms' debería estar presente. if synonyms is not None: index.analyzer(gen_name_analyzer_synonyms(synonyms)) # Mismo razonamiento que con 'name_analyzer_synonyms'. if excluding_terms is not None: index.analyzer(gen_name_analyzer_excluding_terms(excluding_terms)) index.document(doc_class) index.settings(number_of_shards=shards, number_of_replicas=replicas) index.create(using=es)
Example #15
Source File: documents.py From libreborme with GNU Affero General Public License v3.0 | 5 votes |
def configure_index(idx): """Configure ES index settings. NOTE: This is unused at the moment. Current issues: 1. The index needs to be created (index.create() or search_index --create) setting update_all_types=True because of the attribute name being the same in Person and Company. https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create name = fields.TextField(attr="fullname", analyzer=lb_analyzer) 2. How to specifiy token filter for an attribute? Therefore the index needs to be configured outside Django. """ idx.settings(number_of_shards=1, number_of_replicas=0) lb_filter = token_filter( "lb_filter", "stop", stopwords=["i"] ) lb_analyzer = analyzer( "lb_analyzer", tokenizer="standard", filter=["standard", "lb_filter", "asciifolding", "lowercase"] ) return lb_analyzer, lb_filter