Python elasticsearch.helpers.BulkIndexError() Examples

The following are 13 code examples of elasticsearch.helpers.BulkIndexError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch.helpers , or try the search function .
Example #1
Source File: elasticdatastore.py    From ethdrain with MIT License 6 votes vote down vote up
def save(self):
        nb_blocks = sum(act["_type"] == "b" for act in self.actions)
        nb_txs = sum(act["_type"] == "tx" for act in self.actions)

        if self.actions:
            try:
                helpers.bulk(self.elastic, self.actions)
                return "{} blocks and {} transactions indexed".format(
                    nb_blocks, nb_txs
                )

            except helpers.BulkIndexError as exception:
                print("Issue with {} blocks:\n{}\n".format(nb_blocks, exception))
                blocks = (act for act in self.actions if act["_type"] == "b")
                for block in blocks:
                    logging.error("block: " + str(block["_id"])) 
Example #2
Source File: index.py    From lego with MIT License 5 votes vote down vote up
def update(self):
        """
        Updates the entire index.
        We do this in batch to optimize performance. NB: Requires automatic IDs.
        """

        def batch(queryset, func, chunk=100, start=0):
            if not queryset.exists():
                return

            try:
                while start < queryset.order_by("pk").last().pk:
                    func(
                        queryset.filter(pk__gt=start, pk__lte=start + chunk).iterator()
                    )
                    start += chunk
            except TypeError:
                func(queryset.all().iterator())

        def prepare(result):
            prepared = self.prepare(result)
            return prepared["content_type"], prepared["pk"], prepared["data"]

        def update_bulk(result_set):
            try:
                self.get_backend().update_many(map(prepare, result_set))
            except BulkIndexError as e:
                log.critical(e)

        batch(self.get_queryset(), update_bulk) 
Example #3
Source File: test_indexing_helper.py    From searchlight with Apache License 2.0 5 votes vote down vote up
def test_bulk_index_error_handling(self):
        """Check that 404 and 409 errors are appropriately ignored"""
        from elasticsearch import helpers

        mock_engine = mock.Mock()
        plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
        indexing_helper = helper.IndexingHelper(plugin)

        bulk_name = 'searchlight.elasticsearch.plugins.helper.helpers.bulk'
        with mock.patch(bulk_name) as mock_bulk:
            mock_bulk.side_effect = helpers.BulkIndexError(
                "1 document(s) failed to index",
                [{'delete': {"_id": "1", "error": "Some error", "status": 404,
                             "exception": helpers.TransportError()}}]
            )

            indexing_helper.delete_documents([{'_id': '1'}])

            self.assertEqual(1, mock_bulk.call_count)

        with mock.patch(bulk_name) as mock_bulk:
            mock_bulk.side_effect = helpers.BulkIndexError(
                "1 document(s) failed to index",
                [{'index': {"_id": "1",
                            "error": {
                                "type": "version_conflict_engine_exception"},
                            "status": 409}}]
            )
            indexing_helper.save_documents([{'id': '1'}])
            self.assertEqual(1, mock_bulk.call_count) 
Example #4
Source File: test_neutron_securitygroup_plugin.py    From searchlight with Apache License 2.0 5 votes vote down vote up
def test_rule_update_exception(self):
        # Set up the return documents.
        payload = _secgrouprule_fixture(ID1, TENANT1)
        doc = {'_source': {'security_group_rules': [], 'id': 1},
               '_version': 1}

        handler = self.plugin.get_notification_handler()
        with mock.patch.object(self.plugin.index_helper,
                               'get_document') as mock_get:
            with mock.patch.object(self.plugin.index_helper,
                                   'save_document') as mock_save:
                mock_get.return_value = doc
                exc_obj = helpers.BulkIndexError(
                    "Version conflict", [{'index': {
                        "_id": "1", "error": "Some error", "status": 409}}]
                )

                # 1 retry (exception).
                mock_save.side_effect = [exc_obj, {}]
                handler.create_or_update_rule(
                    'security_group_rule.create.end', payload, None)
                # 1 retry +  1 success = 2 calls.
                self.assertEqual(2, mock_get.call_count)
                self.assertEqual(2, mock_save.call_count)

                # 24 retries (exceptions) that exceed the retry limit.
                # Not all retries will be used.
                mock_get.reset_mock()
                mock_save.reset_mock()
                mock_save.side_effect = [exc_obj, exc_obj, exc_obj, exc_obj,
                                         exc_obj, exc_obj, exc_obj, exc_obj,
                                         exc_obj, exc_obj, exc_obj, exc_obj,
                                         exc_obj, exc_obj, exc_obj, exc_obj,
                                         exc_obj, exc_obj, exc_obj, exc_obj,
                                         exc_obj, exc_obj, exc_obj, exc_obj,
                                         {}]
                handler.create_or_update_rule(
                    'security_group_rule.create.end', payload, None)
                # Verified we bailed out after 20 retires.
                self.assertEqual(20, mock_get.call_count)
                self.assertEqual(20, mock_save.call_count) 
Example #5
Source File: projects.py    From repoxplorer with Apache License 2.0 5 votes vote down vote up
def create(self, docs):
        def gen():
            for pid, doc in docs:
                d = {}
                d['_index'] = self.index
                d['_type'] = self.dbname
                d['_op_type'] = 'create'
                d['_id'] = pid
                d['_source'] = doc
                yield d
        try:
            bulk(self.es, gen())
        except BulkIndexError as exc:
            self.manage_bulk_err(exc)
        self.es.indices.refresh(index=self.index) 
Example #6
Source File: elasticsearch_client.py    From MozDef with Mozilla Public License 2.0 5 votes vote down vote up
def save_documents(self, documents):
        # ES library still requires _type to be set
        for document in documents:
            document['_type'] = DOCUMENT_TYPE
        try:
            bulk(self.es_connection, documents)
        except BulkIndexError as e:
            logger.error("Error bulk indexing: " + str(e)) 
Example #7
Source File: elastic.py    From edx-search with GNU Affero General Public License v3.0 5 votes vote down vote up
def remove(self, doc_type, doc_ids, **kwargs):
        """ Implements call to remove the documents from the index """

        try:
            # ignore is flagged as an unexpected-keyword-arg; ES python client documents that it can be used
            # pylint: disable=unexpected-keyword-arg
            actions = []
            for doc_id in doc_ids:
                log.debug("Removing document of type %s and index %s", doc_type, doc_id)  # lint-amnesty, pylint: disable=unicode-format-string
                action = {
                    '_op_type': 'delete',
                    "_index": self.index_name,
                    "_type": doc_type,
                    "_id": doc_id
                }
                actions.append(action)
            bulk(self._es, actions, **kwargs)
        except BulkIndexError as ex:
            valid_errors = [error for error in ex.errors if error['delete']['status'] != 404]

            if valid_errors:
                log.exception("An error occurred while removing documents from the index.")
                raise

    # A few disabled pylint violations here:
    # This procedure takes each of the possible input parameters and builds the query with each argument
    # I tried doing this in separate steps, but IMO it makes it more difficult to follow instead of less
    # So, reasoning:
    #
    #   too-many-arguments: We have all these different parameters to which we
    #       wish to pay attention, it makes more sense to have them listed here
    #       instead of burying them within kwargs
    #
    #   too-many-locals: I think this counts all the arguments as well, but
    #       there are some local variables used herein that are there for transient
    #       purposes and actually promote the ease of understanding
    #
    #   too-many-branches: There's a lot of logic on the 'if I have this
    #       optional argument then...'. Reasoning goes back to its easier to read
    #       the (somewhat linear) flow rather than to jump up to other locations in code 
Example #8
Source File: test_engines.py    From edx-search with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_remove_failure_bulk(self):
        """ the remove operation should fail """
        doc_id = 'test_id'
        doc_type = 'test_doc'
        error = {'delete': {
            'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id
        }}
        with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])):
            with self.assertRaises(BulkIndexError):
                self.searcher.remove("test_doc", ["test_id"]) 
Example #9
Source File: test_pandas_to_eland.py    From eland with Apache License 2.0 5 votes vote down vote up
def test_es_if_exists_append_es_type_coerce_error(self):
        df1 = pandas_to_eland(
            pd_df,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="append",
            es_refresh=True,
            es_type_overrides={"a": "byte"},
        )
        assert_pandas_eland_frame_equal(pd_df, df1)

        pd_df_short = pd.DataFrame(
            {
                "a": [128],  # This value is too large for 'byte'
                "b": [-1.0],
                "c": ["A"],
                "d": [dt],
            },
            index=["3"],
        )

        with pytest.raises(BulkIndexError) as e:
            pandas_to_eland(
                pd_df_short,
                es_client=ES_TEST_CLIENT,
                es_dest_index="test-index",
                es_if_exists="append",
            )

        # Assert that the value 128 caused the index error
        assert "Value [128] is out of range for a byte" in str(e.value) 
Example #10
Source File: test_neutron_securitygroup_plugin.py    From searchlight with Apache License 2.0 4 votes vote down vote up
def test_rule_delete_exception(self):
        # Set up the return documents.
        payload = {'security_group_rule_id': ID1}
        doc_get = {'_source': {'security_group_rules': [], 'id': 1},
                   '_version': 1}
        doc_nest = {'hits': {'hits': [{
                    '_id': 123456789,
                    '_source': {'security_group_rules': []},
                    '_version': 1}]}}

        handler = self.plugin.get_notification_handler()
        with mock.patch.object(self.plugin.index_helper,
                               'get_docs_by_nested_field') as mo_nest:
            with mock.patch.object(self.plugin.index_helper,
                                   'get_document') as mock_get:
                with mock.patch.object(self.plugin.index_helper,
                                       'save_document') as mock_save:
                    mo_nest.return_value = doc_nest
                    mock_get.return_value = doc_get
                    exc_obj = helpers.BulkIndexError(
                        "Version conflict", [{'index': {
                            "_id": "1", "error": "Some error", "status": 409}}]
                    )

                    # 1 retry (exception).
                    mock_save.side_effect = [exc_obj, {}]
                    handler.delete_rule(
                        'security_group_rule.delete.end', payload, None)
                    # 1 retry +  1 success = 2 calls.
                    self.assertEqual(1, mo_nest.call_count)
                    self.assertEqual(1, mock_get.call_count)
                    self.assertEqual(2, mock_save.call_count)

                    # 24 retries (exceptions) that exceed the retry limit.
                    # Not all retries will be used.
                    mo_nest.reset_mock()
                    mock_get.reset_mock()
                    mock_save.reset_mock()
                    mock_save.side_effect = [exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             exc_obj, exc_obj, exc_obj,
                                             {}]
                    handler.delete_rule(
                        'security_group_rule.delete.end', payload, None)
                    # Verified we bailed out after 20 retires.
                    self.assertEqual(1, mo_nest.call_count)
                    self.assertEqual(20, mock_get.call_count)
                    self.assertEqual(20, mock_save.call_count) 
Example #11
Source File: notification_handler.py    From searchlight with Apache License 2.0 4 votes vote down vote up
def _update_server_group_members(self, sg_id, member_id, delete=False):
        # The issue here is that the notification is not complete.
        # We have only a single member that needs to be added to an
        # existing group. A major issue is that we may be updating
        # the ES document while other workers are modifying the rules
        # in the same ES document. This requires an aggressive retry policy,
        # using the "version" field. Since the ES document will have been
        # modified after a conflict, we will need to grab the latest version
        # of the document before continuing. After "retries" number of times,
        # we will admit failure and not try the update anymore.
        LOG.debug("Updating server group member information for %s", sg_id)

        for attempts in range(SERVERGROUP_RETRIES):
            # Read, modify, write of an existing security group.
            doc = self.index_helper.get_document(sg_id)

            if not doc:
                return
            body = doc['_source']
            if not body or 'members' not in body:
                return

            if delete:
                body['members'] = list(filter(
                    lambda r: r != member_id, body['members']))
            else:
                body['members'].append(member_id)

            version = doc['_version']
            try:
                version += 1
                self.index_helper.save_document(body, version=version)
                return body
            except helpers.BulkIndexError as e:
                if e.errors[0]['index']['status'] == 409:
                    # Conflict error, retry with new version of doc.
                    pass
                else:
                    raise

        if attempts == (SERVERGROUP_RETRIES - 1):
            LOG.error('Error updating server group member %(id)s:'
                      ' Too many retries' % {'id': member_id}) 
Example #12
Source File: helper.py    From searchlight with Apache License 2.0 4 votes vote down vote up
def save_documents(self, documents, versions=None, index=None):
        """Send list of serialized documents into search engine.

           Warning: Index vs Alias usage.
           Listeners [plugins/*/notification_handlers.py]:
           When the plugin listeners are indexing documents, we will want
           to use the normal ES alias for their resource group. In this case
           the index parameter will not be set. Listeners are by far the most
           common usage case.

           Re-Indexing [plugins/base.py::index_initial_data()]:
           When we are re-indexing we will want to use the new ES index.
           Bypassing the alias means we will not send duplicate documents
           to the old index. In this case the index will be set. Re-indexing
           is an event that will rarely happen.
        """
        if not index:
            use_index = self.alias_name
        else:
            use_index = index

        for document in documents:
            # NOTE: In Elasticsearch 2.0 field names cannot contain '.', change
            # '.' to '__'.
            utils.replace_dots_in_field_names(document)

        try:
            result = helpers.bulk(
                client=self.engine,
                index=use_index,
                doc_type=self.document_type,
                chunk_size=self.index_chunk_size,
                actions=self._prepare_actions(documents, versions))
        except helpers.BulkIndexError as e:
            err_msg = []
            for err in e.errors:
                if (err['index']['error']['type'] !=
                    VERSION_CONFLICT_EXCEPTION and
                        err['index']['status'] != 409):
                    raise
                err_msg.append("id %(_id)s: %(error)s" % err['index'])
            LOG.warning('Version conflict %s' % ';'.join(err_msg))
            result = 0
        except es_exc.RequestError as e:
            if _is_multiple_alias_exception(e):
                LOG.error("Alias [%(a)s] with multiple indexes error" %
                          {'a': self.alias_name})
                self._index_alias_multiple_indexes_bulk(documents=documents,
                                                        versions=versions)

            result = 0
        LOG.debug("Indexing result: %s", result) 
Example #13
Source File: notification_handlers.py    From searchlight with Apache License 2.0 4 votes vote down vote up
def delete_rule(self, event_type, payload, timestamp):
        # See comment for create_or_update_rule() for details.
        rule_id = payload['security_group_rule_id']
        LOG.debug("Updating security group rule information for %s", rule_id)

        field = 'security_group_rules'

        # Read, modify, write of an existing security group.
        # To avoid a race condition, we are searching for the document
        # in a round-about way. Outside of the retry loop, we will
        # search for the document and save the document ID. This way we
        # do not need to search inside the loop. We will access the document
        # directly by the ID which will always return the latest version.
        orig_doc = self.index_helper.get_docs_by_nested_field(
            "security_group_rules", "id", rule_id, version=True)
        if not orig_doc:
            return
        doc_id = orig_doc['hits']['hits'][0]['_id']
        doc = orig_doc['hits']['hits'][0]
        for attempts in range(SECGROUP_RETRIES):
            body = doc['_source']
            if not body or field not in body:
                return

            body[field] = \
                list(filter(lambda r: r['id'] != rule_id, body[field]))

            version = doc['_version']
            try:
                version += 1
                self.index_helper.save_document(body, version=version)
                return pipeline.IndexItem(self.index_helper.plugin,
                                          event_type,
                                          payload,
                                          body)
            except helpers.BulkIndexError as e:
                if e.errors[0]['index']['status'] == 409:
                    # Conflict. Retry with new version.
                    doc = self.index_helper.get_document(doc_id)
                    if not doc:
                        return
                else:
                    raise

        if attempts == (SECGROUP_RETRIES - 1):
            LOG.error('Error deleting security group rule %(id)s:'
                      ' Too many retries' % {'id': rule_id})