Python Examples of elasticsearch.RequestsHttpConnection

Source File: lambda_handler.py From aws-media-insights-engine with Apache License 2.0

10 votes

def connect_es(endpoint):
    # Handle aws auth for es
    session = boto3.Session()
    credentials = session.get_credentials()
    awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, session.region_name, 'es',
                       session_token=credentials.token)
    print('Connecting to the ES Endpoint: {endpoint}'.format(endpoint=endpoint))
    try:
        es_client = Elasticsearch(
            hosts=[{'host': endpoint, 'port': 443}],
            use_ssl=True,
            verify_certs=True,
            http_auth=awsauth,
            connection_class=RequestsHttpConnection)
    except Exception as e:
        print("Unable to connect to {endpoint}:".format(endpoint=endpoint), e)
    else:
        print('Connected to elasticsearch')
        return es_client

Source File: __init__.py From elastalert with Apache License 2.0

6 votes

def __init__(self, conf):
        """
        :arg conf: es_conn_config dictionary. Ref. :func:`~util.build_es_conn_config`
        """
        super(ElasticSearchClient, self).__init__(host=conf['es_host'],
                                                  port=conf['es_port'],
                                                  url_prefix=conf['es_url_prefix'],
                                                  use_ssl=conf['use_ssl'],
                                                  verify_certs=conf['verify_certs'],
                                                  ca_certs=conf['ca_certs'],
                                                  connection_class=RequestsHttpConnection,
                                                  http_auth=conf['http_auth'],
                                                  timeout=conf['es_conn_timeout'],
                                                  send_get_body_as=conf['send_get_body_as'],
                                                  client_cert=conf['client_cert'],
                                                  client_key=conf['client_key'])
        self._conf = copy.copy(conf)
        self._es_version = None

Source File: es_plugin.py From paclair with Apache License 2.0

6 votes

def __init__(self, clair, hosts, index, doc_type, suffix=None, timedelta=None):
        """
        Constructor

        :param clair: ClairRequest object
        :param hosts: elasticsearch hosts ex:[{'host': '172.18.8.10', 'port': 9200}]
        :param index: elasticsearch index
        :param doc_type: elasticsearch doc_type
        :param suffix: index suffix (ex: one index a day)
        :param timedelta: timedelta from today for suffix
        """
        super().__init__(clair, "Legacy")
        self._es = Elasticsearch(hosts, connection_class=RequestsHttpConnection)
        self.index = index
        self.doc_type = doc_type
        if suffix is not None:
            timedelta = timedelta or {}
            self.index += (datetime.datetime.today() + datetime.timedelta(**timedelta)).strftime(suffix)

Source File: processing.py From Gather-Deployment with MIT License

5 votes

def return_es():
    return Elasticsearch(
        hosts = [{'host': 'elasticsearch', 'port': 9200}],
        connection_class = RequestsHttpConnection,
    )

Source File: melasticsearch.py From mining with MIT License

5 votes

def __init__(self):
        """Open connection on Elasticsearch DataBase"""
        super(Elasticsearch, self).__init__()
        self.search = True
        self.conn = ES([{"host": self.conf.get('host'),
                         "port": int(self.conf.get('port'))}],
                       connection_class=RequestsHttpConnection)
        self.base_url = self.conn.transport.get_connection().base_url

Source File: lambdas.py From quickstart-datalake-47lining with Apache License 2.0

5 votes

def make_elasticsearch_client(elasticsearch_endpoint):
    awsauth = AWSRequestsAuth(
        aws_access_key=os.environ['AWS_ACCESS_KEY_ID'],
        aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
        aws_token=os.environ['AWS_SESSION_TOKEN'],
        aws_host=elasticsearch_endpoint,
        aws_region=os.environ['AWS_REGION'],
        aws_service='es'
    )
    return Elasticsearch(
        hosts=['{0}:443'.format(elasticsearch_endpoint)],
        use_ssl=True,
        connection_class=RequestsHttpConnection,
        http_auth=awsauth
    )

Source File: connection.py From data_pipeline with Apache License 2.0

5 votes

def new_es_client(hosts):
    return Elasticsearch(hosts=hosts,
                         maxsize=50,
                         timeout=1800,
                         # sniff_on_connection_fail=True,
                         # sniff_on_start=True,
                         # sniffer_timeout=60,
                         retry_on_timeout=True,
                         max_retries=10,
                         connection_class=RequestsHttpConnection,
                         verify_certs=True)

Source File: search.py From open-ledger with MIT License

5 votes

def init_es(timeout=TIMEOUT):
    log.info("connecting to %s %s", settings.ELASTICSEARCH_URL, settings.ELASTICSEARCH_PORT)
    auth = AWSRequestsAuth(aws_access_key=settings.AWS_ACCESS_KEY_ID,
                           aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
                           aws_host=settings.ELASTICSEARCH_URL,
                           aws_region='us-west-1',
                           aws_service='es')
    auth.encode = lambda x: bytes(x.encode('utf-8'))
    es = Elasticsearch(host=settings.ELASTICSEARCH_URL,
                       port=settings.ELASTICSEARCH_PORT,
                       connection_class=RequestsHttpConnection,
                       timeout=timeout,
                       max_retries=10, retry_on_timeout=True,
                       http_auth=auth)
    return es

Source File: integration_tests.py From cccatalog-api with MIT License

5 votes

def test05_removed_from_source_not_indexed(self):
        id_to_check = 10494466  #Index for which we changed manually False to True
        es = Elasticsearch(
            host='localhost',
            port=60001,
            connection_class=RequestsHttpConnection,
            timeout=10,
            max_retries=10,
            retry_on_timeout=True,
            http_auth=None,
            wait_for_status='yellow'
        )
        es_query = {
            "query": {
                "match": {
                    "_id": id_to_check
                }
            }
        }
        es.indices.refresh(index='image')
        search_response = es.search(
            index="image",
            body=es_query
        )

        num_hits = search_response['hits']['total']
        msg = "id {} should not show up in search results.".format(id_to_check)
        self.assertEqual(0, num_hits, msg)

Source File: integration_tests.py From cccatalog-api with MIT License

5 votes

def test03_upstream_indexed(self):
        """
        Check that the data has been successfully indexed in Elasticsearch.
        """
        es = Elasticsearch(
            host='localhost',
            port=60001,
            connection_class=RequestsHttpConnection,
            timeout=10,
            max_retries=10,
            retry_on_timeout=True,
            http_auth=None,
            wait_for_status='yellow'
        )
        es_query = {
            "query": {
                "match_all": {}
            }
        }
        es.indices.refresh(index='image')
        search_response = es.search(
            index="image",
            body=es_query
        )
        msg = 'There should be 1000 documents in Elasticsearch after ingestion.'
        self.assertEquals(search_response['hits']['total'], 1000, msg)

Source File: search_controller.py From cccatalog-api with MIT License

5 votes

def _elasticsearch_connect():
    """
    Connect to configured Elasticsearch domain.

    :return: An Elasticsearch connection object.
    """
    auth = AWSRequestsAuth(
        aws_access_key=settings.AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
        aws_host=settings.ELASTICSEARCH_URL,
        aws_region=settings.ELASTICSEARCH_AWS_REGION,
        aws_service='es'
    )
    auth.encode = lambda x: bytes(x.encode('utf-8'))
    _es = Elasticsearch(
        host=settings.ELASTICSEARCH_URL,
        port=settings.ELASTICSEARCH_PORT,
        connection_class=RequestsHttpConnection,
        timeout=10,
        max_retries=1,
        retry_on_timeout=True,
        http_auth=auth,
        wait_for_status='yellow'
    )
    _es.info()
    return _es

Source File: elastic_logs.py From quay with Apache License 2.0

5 votes

def _initialize(self):
        """
        Initialize a connection to an ES cluster and creates an index template if it does not exist.
        """
        if not self._initialized:
            http_auth = None
            if self._access_key and self._secret_key and self._aws_region:
                http_auth = AWS4Auth(self._access_key, self._secret_key, self._aws_region, "es")
            elif self._access_key and self._secret_key:
                http_auth = (self._access_key, self._secret_key)
            else:
                logger.warn("Connecting to Elasticsearch without HTTP auth")

            self._client = connections.create_connection(
                hosts=[{"host": self._host, "port": self._port}],
                http_auth=http_auth,
                use_ssl=self._use_ssl,
                verify_certs=True,
                connection_class=RequestsHttpConnection,
                timeout=ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT,
            )

            # Create a second connection with a timeout of 60s vs 10s.
            # For some reason the PUT template API can take anywhere between
            # 10s and 30s on the test cluster.
            # This only needs to be done once to initialize the index template
            connections.create_connection(
                alias=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
                hosts=[{"host": self._host, "port": self._port}],
                http_auth=http_auth,
                use_ssl=self._use_ssl,
                verify_certs=True,
                connection_class=RequestsHttpConnection,
                timeout=ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT,
            )

            try:
                force_template_update = ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE.lower() == "true"
                self._client.indices.get_template(self._index_prefix)
                LogEntry.init(
                    self._index_prefix,
                    self._index_settings,
                    skip_template_init=not force_template_update,
                )
            except NotFoundError:
                LogEntry.init(self._index_prefix, self._index_settings, skip_template_init=False)
            finally:
                try:
                    connections.remove_connection(ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
                except KeyError as ke:
                    logger.exception(
                        "Elasticsearch connection not found to remove %s: %s",
                        ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
                        ke,
                    )

            self._initialized = True

Source File: FeedElasticsearch.py From content with MIT License

5 votes

def _elasticsearch_builder(self):
        """Builds an Elasticsearch obj with the necessary credentials, proxy settings and secure connection."""
        return Elasticsearch(hosts=[self._server], connection_class=RequestsHttpConnection, http_auth=self._http_auth,
                             verify_certs=self._insecure, proxies=self._proxy, api_key=self._api_key)

Source File: es.py From apm-integration-testing with Apache License 2.0

5 votes

def es():
    class Elasticsearch(object):
        def __init__(self, url):
            verify = default.from_env("PYTHONHTTPSVERIFY") == "1"
            self.es = elasticsearch.Elasticsearch(url,
                                                  verify_certs=verify,
                                                  connection_class=elasticsearch.RequestsHttpConnection)
            self.index = "apm-*"

        def clean(self):
            self.es.indices.delete(self.index)
            self.es.indices.refresh()

        def term_q(self, filters):
            clauses = []
            for field, value in filters:
                if isinstance(value, list):
                    clause = {"terms": {field: value}}
                else:
                    clause = {"term": {field: {"value": value}}}
                clauses.append(clause)
            return {"query": {"bool": {"must": clauses}}}

        @timeout_decorator.timeout(10)
        def count(self, q):
            ct = 0
            while ct == 0:
                time.sleep(3)
                s = self.es.count(index=self.index, body=q)
                ct = s['count']
            return ct

    return Elasticsearch(getElasticsearchURL())

Source File: task_enrich.py From grimoirelab-sirmordred with GNU General Public License v3.0

5 votes

def __autorefresh_studies(self, cfg):
        """Execute autorefresh for areas of code study if configured"""

        if 'studies' not in self.conf[self.backend_section] or \
                'enrich_areas_of_code:git' not in self.conf[self.backend_section]['studies']:
            logger.debug("Not doing autorefresh for studies, Areas of Code study is not active.")
            return

        aoc_index = self.conf['enrich_areas_of_code:git'].get('out_index', GitEnrich.GIT_AOC_ENRICHED)

        # if `out_index` exists but has no value, use default
        if not aoc_index:
            aoc_index = GitEnrich.GIT_AOC_ENRICHED

        logger.debug("Autorefresh for Areas of Code study index: %s", aoc_index)

        es = Elasticsearch([self.conf['es_enrichment']['url']], timeout=100, retry_on_timeout=True,
                           verify_certs=self._get_enrich_backend().elastic.requests.verify,
                           connection_class=RequestsHttpConnection)

        if not es.indices.exists(index=aoc_index):
            logger.debug("Not doing autorefresh, index doesn't exist for Areas of Code study")
            return

        logger.debug("Doing autorefresh for Areas of Code study")

        # Create a GitEnrich backend tweaked to work with AOC index
        aoc_backend = GitEnrich(self.db_sh, None, cfg['projects']['projects_file'],
                                self.db_user, self.db_password, self.db_host)
        aoc_backend.mapping = None
        aoc_backend.roles = ['author']
        elastic_enrich = get_elastic(self.conf['es_enrichment']['url'],
                                     aoc_index, clean=False, backend=aoc_backend)
        aoc_backend.set_elastic(elastic_enrich)

        self.__autorefresh(aoc_backend, studies=True)

Source File: git.py From grimoirelab-elk with GNU General Public License v3.0

4 votes

def enrich_areas_of_code(self, ocean_backend, enrich_backend, no_incremental=False,
                             in_index="git-raw",
                             out_index=GIT_AOC_ENRICHED,
                             sort_on_field='metadata__timestamp'):

        log_prefix = "[git] study areas_of_code"

        logger.info("{} Starting study - Input: {} Output: {}".format(log_prefix, in_index, out_index))

        # Creating connections
        es_in = Elasticsearch([ocean_backend.elastic.url], retry_on_timeout=True, timeout=100,
                              verify_certs=self.elastic.requests.verify,
                              connection_class=RequestsHttpConnection)
        es_out = Elasticsearch([enrich_backend.elastic.url], retry_on_timeout=True,
                               timeout=100, verify_certs=self.elastic.requests.verify,
                               connection_class=RequestsHttpConnection)
        in_conn = ESPandasConnector(es_conn=es_in, es_index=in_index, sort_on_field=sort_on_field)
        out_conn = ESPandasConnector(es_conn=es_out, es_index=out_index, sort_on_field=sort_on_field, read_only=False)

        exists_index = out_conn.exists()
        if no_incremental or not exists_index:
            logger.info("{} Creating out ES index".format(log_prefix))
            # Initialize out index

            if self.elastic.major == '7':
                filename = pkg_resources.resource_filename('grimoire_elk', 'enriched/mappings/git_aoc_es7.json')
            else:
                filename = pkg_resources.resource_filename('grimoire_elk', 'enriched/mappings/git_aoc.json')
            out_conn.create_index(filename, delete=exists_index)

        repos = []
        for source in self.json_projects.values():
            items = source.get('git')
            if items:
                repos.extend(items)

        for repo in repos:
            anonymize_repo = anonymize_url(repo)
            logger.info("{} Processing repo: {}".format(log_prefix, anonymize_repo))
            in_conn.update_repo(anonymize_repo)
            out_conn.update_repo(anonymize_repo)
            areas_of_code(git_enrich=enrich_backend, in_conn=in_conn, out_conn=out_conn)

            # delete the documents in the AOC index which correspond to commits that don't exist in the raw index
            if out_conn.exists():
                self.update_items_aoc(ocean_backend, es_out, out_index, anonymize_repo)

        # Create alias if output index exists and alias does not
        if out_conn.exists():
            if not out_conn.exists_alias(AREAS_OF_CODE_ALIAS) \
                    and not enrich_backend.elastic.alias_in_use(AREAS_OF_CODE_ALIAS):
                logger.info("{} creating alias: {}".format(log_prefix, AREAS_OF_CODE_ALIAS))
                out_conn.create_alias(AREAS_OF_CODE_ALIAS)
            else:
                logger.warning("{} alias already exists: {}.".format(log_prefix, AREAS_OF_CODE_ALIAS))

        logger.info("{} end".format(log_prefix))

Source File: enrich.py From grimoirelab-elk with GNU General Public License v3.0

4 votes

def enrich_onion(self, enrich_backend, in_index, out_index, data_source,
                     contribs_field, timeframe_field, sort_on_field,
                     seconds=ONION_INTERVAL, no_incremental=False):

        log_prefix = "[" + data_source + "] study onion"

        logger.info("{}  starting study - Input: {} Output: {}".format(log_prefix, in_index, out_index))

        # Creating connections
        es = ES([enrich_backend.elastic.url], retry_on_timeout=True, timeout=100,
                verify_certs=self.elastic.requests.verify, connection_class=RequestsHttpConnection)

        in_conn = ESOnionConnector(es_conn=es, es_index=in_index,
                                   contribs_field=contribs_field,
                                   timeframe_field=timeframe_field,
                                   sort_on_field=sort_on_field)
        out_conn = ESOnionConnector(es_conn=es, es_index=out_index,
                                    contribs_field=contribs_field,
                                    timeframe_field=timeframe_field,
                                    sort_on_field=sort_on_field,
                                    read_only=False)

        if not in_conn.exists():
            logger.info("{} missing index {}".format(log_prefix, in_index))
            return

        # Check last execution date
        latest_date = None
        if out_conn.exists():
            latest_date = out_conn.latest_enrichment_date()

        if latest_date:
            logger.info("{} Latest enrichment date: {}".format(log_prefix, latest_date.isoformat()))
            update_after = latest_date + timedelta(seconds=seconds)
            logger.info("{} update after date: {}".format(log_prefix, update_after.isoformat()))
            if update_after >= datetime_utcnow():
                logger.info("{} too soon to update. Next update will be at {}".format(
                            log_prefix, update_after.isoformat()))
                return

        # Onion currently does not support incremental option
        logger.info("{} Creating out ES index".format(log_prefix))
        # Initialize out index
        if self.elastic.major == '7':
            filename = pkg_resources.resource_filename('grimoire_elk', 'enriched/mappings/onion_es7.json')
        else:
            filename = pkg_resources.resource_filename('grimoire_elk', 'enriched/mappings/onion.json')

        out_conn.create_index(filename, delete=out_conn.exists())

        onion_study(in_conn=in_conn, out_conn=out_conn, data_source=data_source)

        # Create alias if output index exists (index is always created from scratch, so
        # alias need to be created each time)
        if out_conn.exists() and not out_conn.exists_alias(out_index, ONION_ALIAS):
            logger.info("{} Creating alias: {}".format(log_prefix, ONION_ALIAS))
            out_conn.create_alias(ONION_ALIAS)

        logger.info("{} end".format(log_prefix))

Source File: index-custom-resource.py From aws-media-services-vod-automation with Apache License 2.0

4 votes

def lambda_handler(event, context):
    '''
    lambda handler to create index templates 
    '''    
    status = True
    host = os.environ['ElasticsearchEndpoint']

    logger.info('REQUEST RECEIVED:\n {}'.format(event))
    logger.info('REQUEST RECEIVED:\n {}'.format(context))
    
    try:   
        if event['RequestType'] == 'Create':
            logger.info('CREATE!')

            session = boto3.Session()
            credentials = session.get_credentials()
            awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, session.region_name, 'es', session_token=credentials.token)

            es = Elasticsearch([{'host': host, 'port': 443}], http_auth = awsauth, use_ssl = True, verify_certs=True, connection_class = RequestsHttpConnection)
            result = es.indices.put_template(name='jobtemplate', body=job_index_template)
            status1 = result.get('acknowledged', False)
            result = es.indices.put_template(name='metrictemplate', body=metric_index_template)
            status2 = result.get('acknowledged', False)
            if (status1 == False or status2 == False):
                send(event, context, "FAILED", { "Message": "Resource creation failed!" }, None)
            else:
                send(event, context, "SUCCESS", { "Message": "Resource creation successful!" }, None)
        elif event['RequestType'] == 'Update':
            logger.info('UPDATE!')
            send(event, context, "SUCCESS", { "Message": "Resource update successful!" }, None)
        elif event['RequestType'] == 'Delete':
            logger.info('DELETE!')
            send(event, context, "SUCCESS", { "Message": "Resource deletion successful!" }, None)
        else:
            logger.info('FAILED!')
            send(event, context, "FAILED", { "Message": "Unexpected event received from CloudFormation" }, None)
    
    except Exception as e:
        message = "Unexected error creating mapping: {}".format(e)
        send(event, context, "FAILED", { "Message": message }, None)
            
    return status

Python elasticsearch.RequestsHttpConnection() Examples