Python Examples of elasticsearch.exceptions.RequestError

Source File: baseapi.py From elasticsearch-dbapi with Apache License 2.0

6 votes

def elastic_query(self, query: str, csv=False):
        """
        Request an http SQL query to elasticsearch
        """
        self.description = None
        # Sanitize query
        query = self.sanitize_query(query)
        payload = {"query": query}
        if csv:
            path = f"/{self.sql_path}/?format=csv"
        else:
            path = f"/{self.sql_path}/"
        try:
            resp = self.es.transport.perform_request("POST", path, body=payload)
        except es_exceptions.ConnectionError as e:
            raise exceptions.OperationalError(
                f"Error connecting to {self.url}: {e.info}"
            )
        except es_exceptions.RequestError as e:
            raise exceptions.ProgrammingError(
                f"Error ({e.error}): {e.info['error']['reason']}"
            )
        return resp

Source File: views.py From cmdb with GNU Lesser General Public License v3.0

6 votes

def create(self, request, *args, **kwargs):
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        data = serializer.validated_data
        # indices = data["indices"] if data["indices"] else "_all"
        indices = self.get_indices(data["indices"])
        sort = ",".join(reversed(list(map(lambda i: ":".join(i), data["sort"].items()))))
        try:
            res = es.search(index=indices,
                            doc_type=self._doc_type,
                            from_=data["page_size"] * (data["page"]-1),
                            size=data["page_size"],
                            sort=sort,
                            body=data["body"],
                            analyze_wildcard=True)
        except NotFoundError as exc:
            return Response({
                "hits": [],
                "max_score": None,
                "total": 0
            })
        except RequestError as exc:
            raise exceptions.ParseError("Search statement error: "+str(exc))
        return Response(res["hits"])

Source File: views.py From cmdb with GNU Lesser General Public License v3.0

6 votes

def create(self, request, *args, **kwargs):
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        data = serializer.validated_data
        # indices = data["indices"] if data["indices"] else "_all"
        indices = self.get_indices(data["indices"])
        sort = ",".join(reversed(list(map(lambda i: ":".join(i), data["sort"].items()))))
        try:
            res = es.search(index=indices,
                            doc_type=self._doc_type,
                            from_=data["page_size"] * (data["page"]-1),
                            size=data["page_size"],
                            sort=sort,
                            q=data["query"],
                            analyze_wildcard=True)
        except NotFoundError as exc:
            return Response({
                "hits": [],
                "max_score": None,
                "total": 0
            })
        except RequestError as exc:
            raise exceptions.ParseError("Search statement error: "+str(exc))
        return Response(res["hits"])

Source File: elastic.py From timesketch with Apache License 2.0

6 votes

def count(self, indices):
        """Count number of documents.

        Args:
            indices: List of indices.

        Returns:
            Number of documents.
        """
        if not indices:
            return 0
        try:
            result = self.client.count(index=indices)
        except (NotFoundError, RequestError) as e:
            es_logger.error(
                'Unable to count indexes (index not found), with '
                'error: {0!s}'.format(e))
            return 0
        return result.get('count', 0)

Source File: tasks.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0

6 votes

def process(self, **kwargs):
        self.set_push_steps(1)
        es = Elasticsearch(hosts=settings.ELASTICSEARCH_CONFIG['hosts'])

        es_index = settings.ELASTICSEARCH_CONFIG['index']

        try:
            es.indices.create(index=es_index)
            self.log_info('Created index: {0}'.format(es_index))
        except RequestError:
            self.log_info('Index already exists: {0}'.format(es_index))

        count = 0
        for tu in TextUnit.objects.iterator():
            self.elastic_index(es, tu)
            count += 1
            if count % 100 == 0:
                self.log_info('Indexing text units: {0} done'.format(count))
        self.log_info('Finished indexing text units. Refreshing ES index.')
        es.indices.refresh(index=es_index)
        self.log_info('Done')
        self.push()

Source File: api.py From hepdata with GNU General Public License v2.0

5 votes

def get_record(record_id, index=None):
    """ Fetch a given record from ES.

    :param record_id: [int] ES record id
    :param index: [string] name of the index. If None a default is used

    :return: [dict] Fetched record
    """
    try:
        result = es.get(index=index, id=record_id)
        return result.get('_source', result)
    except (NotFoundError, RequestError):
        return None

Source File: search.py From arches with GNU Affero General Public License v3.0

5 votes

def search(self, **kwargs):
        """
        Search for an item in the index.
        Pass an index and id (or list of ids) to get a specific document(s)
        Pass a body with a query dsl to perform a search

        """

        kwargs = self._add_prefix(**kwargs)
        body = kwargs.get("body", None)
        id = kwargs.pop("id", None)

        if id:
            if isinstance(id, str):
                id = id.split(",")
                if len(id) == 1:
                    id = id[0]
            if isinstance(id, list):
                kwargs["body"] = {"ids": id}
                return self.es.mget(**kwargs)
            else:
                kwargs.pop("body", None)  # remove body param
                kwargs["id"] = id
                return self.es.get(**kwargs)

        ret = None
        try:
            ret = self.es.search(**kwargs)
        except RequestError as detail:
            self.logger.exception("%s: WARNING: search failed for query: %s \nException detail: %s\n" % (datetime.now(), body, detail))

        return ret

Source File: es.py From nboost with Apache License 2.0

5 votes

def index(self):
        """send csv to ES index"""
        self.logger.info('Setting up Elasticsearch index...')
        elastic = Elasticsearch(host=self.host, port=self.port, timeout=10000)
        try:
            self.logger.info('Creating index %s...' % self.index_name)
            elastic.indices.create(self.index_name, self.mapping)
        except RequestError:
            self.logger.info('Index already exists, skipping...')

        self.logger.info('Indexing %s...' % self.file)
        act = (self.format(passage, cid=cid) for cid, passage in self.csv_generator())
        bulk(elastic, actions=act)

Source File: es.py From hoover-search with MIT License

5 votes

def elasticsearch():
    try:
        yield Elasticsearch(settings.HOOVER_ELASTICSEARCH_URL)
    except ConnectionError:
        raise SearchError('Could not connect to Elasticsearch.')
    except RequestError as e:
        reason = 'reason unknown'
        try:
            if e.info:
                reason = e.info['error']['root_cause'][0]['reason']
        except LookupError:
            pass
        raise SearchError('Elasticsearch failed: ' + reason)

Source File: helper.py From searchlight with Apache License 2.0

5 votes

def _is_multiple_alias_exception(e):
    """Verify that this exception is specifically the IllegalArgument
       exception when there are multiple indexes for an alias. There
       is no clean way of verifying this is the case. There are multiple
       ES RequestError exceptions that return a 400 IllegalArgument.
       In this particular case, we are expecting a message in the
       exception like this:

           ElasticsearchIllegalArgumentException[Alias [alias] has more
           than one indices associated with it [[idx1, idx2]], can't
           execute a single index op]

       We will be dirty and parse the exception message. We need to
       check the validity of ALIAS_EXCEPTION_STRING in future
       ElasticSearch versions.
    """
    if ALIAS_EXCEPTION_STRING in getattr(e, 'error', ''):
        # Elasticsearch 1.x
        return True

    # ES 2 - the error info's in e.info.error.reason
    exc_info_error = getattr(e, 'info', {}).get('error', {})
    if ALIAS_EXCEPTION_STRING in exc_info_error.get('reason', ''):
        return True

    return False

Source File: test_search.py From searchlight with Apache License 2.0

5 votes

def test_search_badrequest(self):
        request = unit_test_utils.get_fake_request()
        with mock.patch(REPO_SEARCH, side_effect=es_exc.RequestError):
            query = {}
            index_name = "searchlight"
            doc_type = "OS::Glance::Metadef"
            offset = 0
            limit = 10

            self.assertRaises(
                webob.exc.HTTPBadRequest, self.search_controller.search,
                request, query, index_name, doc_type, offset, limit)

Source File: test_indexing_helper.py From searchlight with Apache License 2.0

5 votes

def test_multiple_alias_exception_elasticsearch2(self):
        # The es-2 format is different
        alias_exc = es_exceptions.RequestError(
            400,
            'illegal_argument_exception',
            {"error": {
                "root_cause": [{
                    "type": "illegal_argument_exception",
                    "reason": "Something " + helper.ALIAS_EXCEPTION_STRING
                }],
                "type": "illegal_argument_exception",
                "reason": "Something " + helper.ALIAS_EXCEPTION_STRING
            }})
        self.assertTrue(helper._is_multiple_alias_exception(alias_exc))

Source File: test_indexing_helper.py From searchlight with Apache License 2.0

5 votes

def test_multiple_alias_exception_detection(self):
        alias_exc = es_exceptions.RequestError(
            400, "Something " + helper.ALIAS_EXCEPTION_STRING, {})
        self.assertTrue(helper._is_multiple_alias_exception(alias_exc))

        other_exc = Exception("Blah blah")
        self.assertFalse(helper._is_multiple_alias_exception(other_exc))

Source File: manage.py From open-raadsinformatie with MIT License

5 votes

def create_indexes(mapping_dir):
    """
    Create all indexes for which a mapping- and settings file is available.

    It is assumed that mappings in the specified directory follow the
    following naming convention: "ocd_mapping_{SOURCE_NAME}.json".
    For example: "ocd_mapping_rijksmuseum.json".
    """
    click.echo('Creating indexes for ES mappings in %s' % mapping_dir)

    for mapping_file_path in glob('%s/ori_mapping_*.json' % mapping_dir):
        # Extract the index name from the filename
        index_name = DEFAULT_INDEX_PREFIX
        mapping_file = os.path.split(mapping_file_path)[-1].split('.')[0]
        index_name = '%s_%s' % (DEFAULT_INDEX_PREFIX,
                                '_'.join(mapping_file.rsplit('_')[2:]))

        click.echo('Creating ES index %s' % index_name)

        mapping_file = open(mapping_file_path, 'rb')
        mapping = json.load(mapping_file)
        mapping_file.close()

        try:
            es.indices.create(index=index_name, body=mapping)
        except RequestError as e:
            error_msg = click.style('Failed to create index %s due to ES '
                                    'error: %s' % (index_name, e.error),
                                    fg='red')
            click.echo(error_msg)

Source File: helper.py From searchlight with Apache License 2.0

4 votes

def update_document(self, document, doc_id, update_as_script,
                        expected_version=None):
        """Updates are a little simpler than inserts because the documents
        already exist. Note that scripted updates are not filtered in the same
        way as partial document updates. Script updates should be passed as
        a dict {"script": .., "parameters": ..}. Partial document updates
        should be the raw document.
        """
        def _get_update_action(source, id_suffix=''):
            action = {'_id': doc_id + id_suffix, '_op_type': 'update'}
            if expected_version:
                action['_version'] = expected_version
            if update_as_script:
                action.update(source)
            else:
                action['doc'] = source

            routing_field = self.plugin.routing_field
            if routing_field:
                action['_routing'] = source[routing_field]

            return action

        if self.plugin.requires_role_separation:
            user_doc = (self._remove_admin_fields(document)
                        if update_as_script else document)
            actions = [_get_update_action(document, ADMIN_ID_SUFFIX),
                       _get_update_action(user_doc, USER_ID_SUFFIX)]
        else:
            actions = [_get_update_action(document)]
        try:
            result = helpers.bulk(
                client=self.engine,
                index=self.alias_name,
                doc_type=self.document_type,
                chunk_size=self.index_chunk_size,
                actions=actions)
            LOG.debug("Update result: %s", result)
        except es_exc.RequestError as e:
            if _is_multiple_alias_exception(e):
                LOG.error("Alias [%(a)s] with multiple indexes error" %
                          {'a': self.alias_name})
                self._index_alias_multiple_indexes_bulk(actions=actions)

Source File: elasticsearch.py From zentral with Apache License 2.0

4 votes

def wait_and_configure(self):
        for i in range(self.MAX_CONNECTION_ATTEMPTS):
            # get or create index
            try:
                info = self._es.info()
                self.version = [int(i) for i in info["version"]["number"].split(".")]
                if not self._es.indices.exists(self.index):
                    self._es.indices.create(self.index, body=self.get_index_conf())
                    self.use_mapping_types = False
                    logger.info("Index %s created", self.index)
            except ConnectionError:
                s = (i + 1) * random.uniform(0.9, 1.1)
                logger.warning('Could not connect to server %d/%d. Sleep %ss',
                               i + 1, self.MAX_CONNECTION_ATTEMPTS, s)
                time.sleep(s)
                continue
            except RequestError as exception:
                error = exception.error.lower()
                if "already" in error and "exist" in error:
                    # race
                    logger.info('Index %s exists', self.index)
                else:
                    raise
            # wait for index recovery
            waiting_for_recovery = False
            while True:
                recovery = self._es.indices.recovery(self.index, params={"active_only": "true"})
                shards = recovery.get(self.index, {}).get("shards", [])
                if any(c["stage"] != "DONE" for c in shards):
                    waiting_for_recovery = True
                    s = 1000 / random.randint(1000, 3000)
                    time.sleep(s)
                    logger.warning("Elasticsearch index recovering")
                else:
                    if waiting_for_recovery:
                        logger.warning("Elasticsearch index recovery done")
                    break
            self.configured = True
            break
        else:
            raise Exception('Could not connect to server')

        # use_mapping_types
        if self.use_mapping_types is None:
            if self.version >= [7]:
                self.use_mapping_types = False
            else:
                mappings = set(list(self._es.indices.get_mapping(self.index).values())[0]['mappings'])
                self.use_mapping_types = self.LEGACY_DOC_TYPE not in mappings

Source File: helper.py From searchlight with Apache License 2.0

4 votes

def save_documents(self, documents, versions=None, index=None):
        """Send list of serialized documents into search engine.

           Warning: Index vs Alias usage.
           Listeners [plugins/*/notification_handlers.py]:
           When the plugin listeners are indexing documents, we will want
           to use the normal ES alias for their resource group. In this case
           the index parameter will not be set. Listeners are by far the most
           common usage case.

           Re-Indexing [plugins/base.py::index_initial_data()]:
           When we are re-indexing we will want to use the new ES index.
           Bypassing the alias means we will not send duplicate documents
           to the old index. In this case the index will be set. Re-indexing
           is an event that will rarely happen.
        """
        if not index:
            use_index = self.alias_name
        else:
            use_index = index

        for document in documents:
            # NOTE: In Elasticsearch 2.0 field names cannot contain '.', change
            # '.' to '__'.
            utils.replace_dots_in_field_names(document)

        try:
            result = helpers.bulk(
                client=self.engine,
                index=use_index,
                doc_type=self.document_type,
                chunk_size=self.index_chunk_size,
                actions=self._prepare_actions(documents, versions))
        except helpers.BulkIndexError as e:
            err_msg = []
            for err in e.errors:
                if (err['index']['error']['type'] !=
                    VERSION_CONFLICT_EXCEPTION and
                        err['index']['status'] != 409):
                    raise
                err_msg.append("id %(_id)s: %(error)s" % err['index'])
            LOG.warning('Version conflict %s' % ';'.join(err_msg))
            result = 0
        except es_exc.RequestError as e:
            if _is_multiple_alias_exception(e):
                LOG.error("Alias [%(a)s] with multiple indexes error" %
                          {'a': self.alias_name})
                self._index_alias_multiple_indexes_bulk(documents=documents,
                                                        versions=versions)

            result = 0
        LOG.debug("Indexing result: %s", result)

Source File: elastic.py From timesketch with Apache License 2.0

4 votes

def create_index(self, index_name=uuid4().hex, doc_type='generic_event'):
        """Create index with Timesketch settings.

        Args:
            index_name: Name of the index. Default is a generated UUID.
            doc_type: Name of the document type. Default id generic_event.

        Returns:
            Index name in string format.
            Document type in string format.
        """
        _document_mapping = {
            'properties': {
                'timesketch_label': {
                    'type': 'nested'
                },
                'datetime': {
                    'type': 'date'
                }
            }
        }

        # TODO: Remove when we deprecate Elasticsearch version 6.x
        if self.version.startswith('6'):
            _document_mapping = {doc_type: _document_mapping}

        if not self.client.indices.exists(index_name):
            try:
                self.client.indices.create(
                    index=index_name, body={'mappings': _document_mapping})
            except ConnectionError:
                raise RuntimeError('Unable to connect to Timesketch backend.')
            except RequestError:
                index_exists = self.client.indices.exists(index_name)
                es_logger.warning(
                    'Attempting to create an index that already exists '
                    '({0:s} - {1:s})'.format(index_name, str(index_exists)))

        # We want to return unicode here to keep SQLalchemy happy.
        if six.PY2:
            if not isinstance(index_name, six.text_type):
                index_name = codecs.decode(index_name, 'utf-8')

            if not isinstance(doc_type, six.text_type):
                doc_type = codecs.decode(doc_type, 'utf-8')

        return index_name, doc_type

Source File: search.py From searchlight with Apache License 2.0

4 votes

def search(self, req, query, index=None, doc_type=None,
               from_=0, size=None, **kwargs):
        """Supported kwargs:
        :param _source:
        :param _source_include:
        :param _source_exclude:
        :return:
        """
        if size is None:
            size = CONF.limit_param_default

        try:
            search_repo = self.gateway.get_catalog_search_repo(req.context)
            result = search_repo.search(index,
                                        doc_type,
                                        query,
                                        from_,
                                        size,
                                        ignore_unavailable=True,
                                        **kwargs)

            hits = result.get('hits', {}).get('hits', [])
            try:
                # Note that there's an assumption that the plugin resource
                # type is always the same as the document type. If this is not
                # the case in future, a reverse lookup's needed
                for hit in hits:
                    plugin = self.plugins[hit['_type']].obj
                    plugin.filter_result(hit, req.context)
            except KeyError as e:
                raise Exception("No registered plugin for type %s" % e.message)
            return result
        except exception.Forbidden as e:
            raise webob.exc.HTTPForbidden(explanation=e.msg)
        except exception.NotFound as e:
            raise webob.exc.HTTPNotFound(explanation=e.msg)
        except exception.Duplicate as e:
            raise webob.exc.HTTPConflict(explanation=e.msg)
        except es_exc.RequestError:
            msg = _("Query malformed or search parse failure, please check "
                    "the syntax")
            raise webob.exc.HTTPBadRequest(explanation=msg)
        except Exception as e:
            LOG.error(encodeutils.exception_to_unicode(e))
            raise webob.exc.HTTPInternalServerError()

Python elasticsearch.exceptions.RequestError() Examples