Python elasticsearch.helpers.BulkIndexError() Examples
The following are 13
code examples of elasticsearch.helpers.BulkIndexError().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
elasticsearch.helpers
, or try the search function
.
Example #1
Source File: elasticdatastore.py From ethdrain with MIT License | 6 votes |
def save(self): nb_blocks = sum(act["_type"] == "b" for act in self.actions) nb_txs = sum(act["_type"] == "tx" for act in self.actions) if self.actions: try: helpers.bulk(self.elastic, self.actions) return "{} blocks and {} transactions indexed".format( nb_blocks, nb_txs ) except helpers.BulkIndexError as exception: print("Issue with {} blocks:\n{}\n".format(nb_blocks, exception)) blocks = (act for act in self.actions if act["_type"] == "b") for block in blocks: logging.error("block: " + str(block["_id"]))
Example #2
Source File: index.py From lego with MIT License | 5 votes |
def update(self): """ Updates the entire index. We do this in batch to optimize performance. NB: Requires automatic IDs. """ def batch(queryset, func, chunk=100, start=0): if not queryset.exists(): return try: while start < queryset.order_by("pk").last().pk: func( queryset.filter(pk__gt=start, pk__lte=start + chunk).iterator() ) start += chunk except TypeError: func(queryset.all().iterator()) def prepare(result): prepared = self.prepare(result) return prepared["content_type"], prepared["pk"], prepared["data"] def update_bulk(result_set): try: self.get_backend().update_many(map(prepare, result_set)) except BulkIndexError as e: log.critical(e) batch(self.get_queryset(), update_bulk)
Example #3
Source File: test_indexing_helper.py From searchlight with Apache License 2.0 | 5 votes |
def test_bulk_index_error_handling(self): """Check that 404 and 409 errors are appropriately ignored""" from elasticsearch import helpers mock_engine = mock.Mock() plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine) indexing_helper = helper.IndexingHelper(plugin) bulk_name = 'searchlight.elasticsearch.plugins.helper.helpers.bulk' with mock.patch(bulk_name) as mock_bulk: mock_bulk.side_effect = helpers.BulkIndexError( "1 document(s) failed to index", [{'delete': {"_id": "1", "error": "Some error", "status": 404, "exception": helpers.TransportError()}}] ) indexing_helper.delete_documents([{'_id': '1'}]) self.assertEqual(1, mock_bulk.call_count) with mock.patch(bulk_name) as mock_bulk: mock_bulk.side_effect = helpers.BulkIndexError( "1 document(s) failed to index", [{'index': {"_id": "1", "error": { "type": "version_conflict_engine_exception"}, "status": 409}}] ) indexing_helper.save_documents([{'id': '1'}]) self.assertEqual(1, mock_bulk.call_count)
Example #4
Source File: test_neutron_securitygroup_plugin.py From searchlight with Apache License 2.0 | 5 votes |
def test_rule_update_exception(self): # Set up the return documents. payload = _secgrouprule_fixture(ID1, TENANT1) doc = {'_source': {'security_group_rules': [], 'id': 1}, '_version': 1} handler = self.plugin.get_notification_handler() with mock.patch.object(self.plugin.index_helper, 'get_document') as mock_get: with mock.patch.object(self.plugin.index_helper, 'save_document') as mock_save: mock_get.return_value = doc exc_obj = helpers.BulkIndexError( "Version conflict", [{'index': { "_id": "1", "error": "Some error", "status": 409}}] ) # 1 retry (exception). mock_save.side_effect = [exc_obj, {}] handler.create_or_update_rule( 'security_group_rule.create.end', payload, None) # 1 retry + 1 success = 2 calls. self.assertEqual(2, mock_get.call_count) self.assertEqual(2, mock_save.call_count) # 24 retries (exceptions) that exceed the retry limit. # Not all retries will be used. mock_get.reset_mock() mock_save.reset_mock() mock_save.side_effect = [exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, {}] handler.create_or_update_rule( 'security_group_rule.create.end', payload, None) # Verified we bailed out after 20 retires. self.assertEqual(20, mock_get.call_count) self.assertEqual(20, mock_save.call_count)
Example #5
Source File: projects.py From repoxplorer with Apache License 2.0 | 5 votes |
def create(self, docs): def gen(): for pid, doc in docs: d = {} d['_index'] = self.index d['_type'] = self.dbname d['_op_type'] = 'create' d['_id'] = pid d['_source'] = doc yield d try: bulk(self.es, gen()) except BulkIndexError as exc: self.manage_bulk_err(exc) self.es.indices.refresh(index=self.index)
Example #6
Source File: elasticsearch_client.py From MozDef with Mozilla Public License 2.0 | 5 votes |
def save_documents(self, documents): # ES library still requires _type to be set for document in documents: document['_type'] = DOCUMENT_TYPE try: bulk(self.es_connection, documents) except BulkIndexError as e: logger.error("Error bulk indexing: " + str(e))
Example #7
Source File: elastic.py From edx-search with GNU Affero General Public License v3.0 | 5 votes |
def remove(self, doc_type, doc_ids, **kwargs): """ Implements call to remove the documents from the index """ try: # ignore is flagged as an unexpected-keyword-arg; ES python client documents that it can be used # pylint: disable=unexpected-keyword-arg actions = [] for doc_id in doc_ids: log.debug("Removing document of type %s and index %s", doc_type, doc_id) # lint-amnesty, pylint: disable=unicode-format-string action = { '_op_type': 'delete', "_index": self.index_name, "_type": doc_type, "_id": doc_id } actions.append(action) bulk(self._es, actions, **kwargs) except BulkIndexError as ex: valid_errors = [error for error in ex.errors if error['delete']['status'] != 404] if valid_errors: log.exception("An error occurred while removing documents from the index.") raise # A few disabled pylint violations here: # This procedure takes each of the possible input parameters and builds the query with each argument # I tried doing this in separate steps, but IMO it makes it more difficult to follow instead of less # So, reasoning: # # too-many-arguments: We have all these different parameters to which we # wish to pay attention, it makes more sense to have them listed here # instead of burying them within kwargs # # too-many-locals: I think this counts all the arguments as well, but # there are some local variables used herein that are there for transient # purposes and actually promote the ease of understanding # # too-many-branches: There's a lot of logic on the 'if I have this # optional argument then...'. Reasoning goes back to its easier to read # the (somewhat linear) flow rather than to jump up to other locations in code
Example #8
Source File: test_engines.py From edx-search with GNU Affero General Public License v3.0 | 5 votes |
def test_remove_failure_bulk(self): """ the remove operation should fail """ doc_id = 'test_id' doc_type = 'test_doc' error = {'delete': { 'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id }} with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])): with self.assertRaises(BulkIndexError): self.searcher.remove("test_doc", ["test_id"])
Example #9
Source File: test_pandas_to_eland.py From eland with Apache License 2.0 | 5 votes |
def test_es_if_exists_append_es_type_coerce_error(self): df1 = pandas_to_eland( pd_df, es_client=ES_TEST_CLIENT, es_dest_index="test-index", es_if_exists="append", es_refresh=True, es_type_overrides={"a": "byte"}, ) assert_pandas_eland_frame_equal(pd_df, df1) pd_df_short = pd.DataFrame( { "a": [128], # This value is too large for 'byte' "b": [-1.0], "c": ["A"], "d": [dt], }, index=["3"], ) with pytest.raises(BulkIndexError) as e: pandas_to_eland( pd_df_short, es_client=ES_TEST_CLIENT, es_dest_index="test-index", es_if_exists="append", ) # Assert that the value 128 caused the index error assert "Value [128] is out of range for a byte" in str(e.value)
Example #10
Source File: test_neutron_securitygroup_plugin.py From searchlight with Apache License 2.0 | 4 votes |
def test_rule_delete_exception(self): # Set up the return documents. payload = {'security_group_rule_id': ID1} doc_get = {'_source': {'security_group_rules': [], 'id': 1}, '_version': 1} doc_nest = {'hits': {'hits': [{ '_id': 123456789, '_source': {'security_group_rules': []}, '_version': 1}]}} handler = self.plugin.get_notification_handler() with mock.patch.object(self.plugin.index_helper, 'get_docs_by_nested_field') as mo_nest: with mock.patch.object(self.plugin.index_helper, 'get_document') as mock_get: with mock.patch.object(self.plugin.index_helper, 'save_document') as mock_save: mo_nest.return_value = doc_nest mock_get.return_value = doc_get exc_obj = helpers.BulkIndexError( "Version conflict", [{'index': { "_id": "1", "error": "Some error", "status": 409}}] ) # 1 retry (exception). mock_save.side_effect = [exc_obj, {}] handler.delete_rule( 'security_group_rule.delete.end', payload, None) # 1 retry + 1 success = 2 calls. self.assertEqual(1, mo_nest.call_count) self.assertEqual(1, mock_get.call_count) self.assertEqual(2, mock_save.call_count) # 24 retries (exceptions) that exceed the retry limit. # Not all retries will be used. mo_nest.reset_mock() mock_get.reset_mock() mock_save.reset_mock() mock_save.side_effect = [exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, exc_obj, {}] handler.delete_rule( 'security_group_rule.delete.end', payload, None) # Verified we bailed out after 20 retires. self.assertEqual(1, mo_nest.call_count) self.assertEqual(20, mock_get.call_count) self.assertEqual(20, mock_save.call_count)
Example #11
Source File: notification_handler.py From searchlight with Apache License 2.0 | 4 votes |
def _update_server_group_members(self, sg_id, member_id, delete=False): # The issue here is that the notification is not complete. # We have only a single member that needs to be added to an # existing group. A major issue is that we may be updating # the ES document while other workers are modifying the rules # in the same ES document. This requires an aggressive retry policy, # using the "version" field. Since the ES document will have been # modified after a conflict, we will need to grab the latest version # of the document before continuing. After "retries" number of times, # we will admit failure and not try the update anymore. LOG.debug("Updating server group member information for %s", sg_id) for attempts in range(SERVERGROUP_RETRIES): # Read, modify, write of an existing security group. doc = self.index_helper.get_document(sg_id) if not doc: return body = doc['_source'] if not body or 'members' not in body: return if delete: body['members'] = list(filter( lambda r: r != member_id, body['members'])) else: body['members'].append(member_id) version = doc['_version'] try: version += 1 self.index_helper.save_document(body, version=version) return body except helpers.BulkIndexError as e: if e.errors[0]['index']['status'] == 409: # Conflict error, retry with new version of doc. pass else: raise if attempts == (SERVERGROUP_RETRIES - 1): LOG.error('Error updating server group member %(id)s:' ' Too many retries' % {'id': member_id})
Example #12
Source File: helper.py From searchlight with Apache License 2.0 | 4 votes |
def save_documents(self, documents, versions=None, index=None): """Send list of serialized documents into search engine. Warning: Index vs Alias usage. Listeners [plugins/*/notification_handlers.py]: When the plugin listeners are indexing documents, we will want to use the normal ES alias for their resource group. In this case the index parameter will not be set. Listeners are by far the most common usage case. Re-Indexing [plugins/base.py::index_initial_data()]: When we are re-indexing we will want to use the new ES index. Bypassing the alias means we will not send duplicate documents to the old index. In this case the index will be set. Re-indexing is an event that will rarely happen. """ if not index: use_index = self.alias_name else: use_index = index for document in documents: # NOTE: In Elasticsearch 2.0 field names cannot contain '.', change # '.' to '__'. utils.replace_dots_in_field_names(document) try: result = helpers.bulk( client=self.engine, index=use_index, doc_type=self.document_type, chunk_size=self.index_chunk_size, actions=self._prepare_actions(documents, versions)) except helpers.BulkIndexError as e: err_msg = [] for err in e.errors: if (err['index']['error']['type'] != VERSION_CONFLICT_EXCEPTION and err['index']['status'] != 409): raise err_msg.append("id %(_id)s: %(error)s" % err['index']) LOG.warning('Version conflict %s' % ';'.join(err_msg)) result = 0 except es_exc.RequestError as e: if _is_multiple_alias_exception(e): LOG.error("Alias [%(a)s] with multiple indexes error" % {'a': self.alias_name}) self._index_alias_multiple_indexes_bulk(documents=documents, versions=versions) result = 0 LOG.debug("Indexing result: %s", result)
Example #13
Source File: notification_handlers.py From searchlight with Apache License 2.0 | 4 votes |
def delete_rule(self, event_type, payload, timestamp): # See comment for create_or_update_rule() for details. rule_id = payload['security_group_rule_id'] LOG.debug("Updating security group rule information for %s", rule_id) field = 'security_group_rules' # Read, modify, write of an existing security group. # To avoid a race condition, we are searching for the document # in a round-about way. Outside of the retry loop, we will # search for the document and save the document ID. This way we # do not need to search inside the loop. We will access the document # directly by the ID which will always return the latest version. orig_doc = self.index_helper.get_docs_by_nested_field( "security_group_rules", "id", rule_id, version=True) if not orig_doc: return doc_id = orig_doc['hits']['hits'][0]['_id'] doc = orig_doc['hits']['hits'][0] for attempts in range(SECGROUP_RETRIES): body = doc['_source'] if not body or field not in body: return body[field] = \ list(filter(lambda r: r['id'] != rule_id, body[field])) version = doc['_version'] try: version += 1 self.index_helper.save_document(body, version=version) return pipeline.IndexItem(self.index_helper.plugin, event_type, payload, body) except helpers.BulkIndexError as e: if e.errors[0]['index']['status'] == 409: # Conflict. Retry with new version. doc = self.index_helper.get_document(doc_id) if not doc: return else: raise if attempts == (SECGROUP_RETRIES - 1): LOG.error('Error deleting security group rule %(id)s:' ' Too many retries' % {'id': rule_id})