Python Examples of elasticsearch.Elasticsearch

Source File: function.py From Gather-Deployment with MIT License

10 votes

def run(self):
        with self.input()['Emotion'].open('r') as fopen:
            emotions = json.load(fopen)
        es = Elasticsearch()
        for i in range(0, len(emotions), self.batch_size):
            batch = emotions[i : min(i + self.batch_size, len(emotions))]
            actions = [
                {
                    '_index': self.index,
                    '_type': 'text',
                    '_id': '%d-%s' % (i + j, self.summary),
                    '_source': batch[j],
                }
                for j in range(len(batch))
            ]
            helpers.bulk(es, actions)

Source File: __main__.py From elyzer with Apache License 2.0

7 votes

def main():
    try:
        args = parse_args()
        es = Elasticsearch(args['es'])
        stepWise(es=es,
                 text=args['text'],
                 indexName=args['index'],
                 analyzer=getAnalyzer(indexName=args['index'],
                                      analyzerName=args['analyzer'],
                                      es=es))

    except KeyboardInterrupt:
        print('Interrupted')
    except AnalyzerNotFound as e:
        print(e.error)
    except TransportError as e:
        print("Unexpected Elasticsearch Transport Exception:")
        print(e.error)
        print(e.info)

Source File: kafka_bulk_daemon.py From search-MjoLniR with MIT License

6 votes

def configure(parser: ArgumentParser) -> Callable:
    parser.add_argument(
        '-b', '--brokers', dest='brokers', required=True, type=str,
        help='Kafka brokers to bootstrap from as a comma separated list of <host>:<port>')
    parser.add_argument(
       '-c', '--es-clusters', dest='es_clusters', required=True, type=str,
       help='Elasticsearch servers to bootstrap from as a comma separated list of <host>:<port>')
    parser.add_argument(
        '-t', '--topic', dest='topics', required=True, type=str, nargs='+',
        help='Kafka topic(s) to read indexing requests from. Multiple topics may be provided.')
    parser.add_argument(
        '-g', '--group-id', dest='group_id', type=str, default='TODO',
        help='Kafka consumer group to join')
    parser.add_argument(
        '--prometheus-port', dest='prometheus_port', default=9170, type=int, required=False,
        help='Port to export prometheus metrics over.')
    return main

Source File: es_indexing.py From BERT with Apache License 2.0

6 votes

def __init__(self, config=None):

		if isinstance(config, dict) or isinstance(config, OrderedDict):
			self.config = config
		elif isinstance(config, str):
			try:
				self.config = json.load(open(config, "r"))
			except:
				self.config = {}

		self.username = self.config.get("username", "data_security_es_45")
		self.password = self.config.get("password", "Nb6121ca7ffe3")
		es_url = self.config.get("es_url", ['http://zsearch.alipay.com:9999'])

		if isinstance(es_url, list):
			self.es_url = es_url
		else:
			self.es_url = [es_url]

		self.es = Elasticsearch(self.es_url, http_auth=(self.username, self.password))

Source File: __init__.py From superelasticsearch with MIT License

6 votes

def __init__(self, client, params=None, **kwargs):
        '''
        API for performing easy bulk operations in Elasticsearch.

        :arg client: instance of official Elasticsearch Python client.
        :arg index: Default index for items which don't provide one
        :arg doc_type: Default document type for items which don't provide one
        :arg consistency: Explicit write consistency setting for the operation
        :arg refresh: Refresh the index after performing the operation
        :arg routing: Specific routing value
        :arg replication: Explicitly set the replication type (default: sync)
        :arg timeout: Explicit operation timeout

        .. Note:: all the arguments passed at the time create a new bulk
                  operation can be overridden when
                  :meth:`BulkOperation.execute`: is called.
        '''

        self._client = client
        self._params = params
        self._actions = []

Source File: ssdeep_querying.py From ssdeep-elastic with MIT License

6 votes

def insert_record_to_ssdeep_index(ssdeep_value, sha256):
    """
    Adds a record to the ssdeep index in elasticsearch
    :param ssdeep_value: The ssdeep hash value of the item
    :param sha256: The sha256 hash value of the item
    """
    chunksize, chunk, double_chunk = ssdeep_value.split(':')
    chunksize = int(chunksize)

    es = elasticsearch.Elasticsearch(['localhost:9200'])

    document = {'chunksize': chunksize, 'chunk': chunk, 'double_chunk': double_chunk, 'ssdeep': ssdeep_value,
                'sha256': sha256}

    es.index('ssdeep-index', 'record', document)
    es.indices.refresh('ssdeep-index')

Source File: api.py From elasticsearch-dbapi with Apache License 2.0

6 votes

def __init__(
        self,
        host="localhost",
        port=443,
        path="",
        scheme="https",
        user=None,
        password=None,
        context=None,
        **kwargs,
    ):
        super().__init__(
            host=host,
            port=port,
            path=path,
            scheme=scheme,
            user=user,
            password=password,
            context=context,
            **kwargs,
        )
        if user and password:
            self.es = Elasticsearch(self.url, http_auth=(user, password), **self.kwargs)
        else:
            self.es = Elasticsearch(self.url, **self.kwargs)

Source File: api.py From elasticsearch-dbapi with Apache License 2.0

6 votes

def __init__(
        self,
        host="localhost",
        port=9200,
        path="",
        scheme="http",
        user=None,
        password=None,
        context=None,
        **kwargs,
    ):
        super().__init__(
            host=host,
            port=port,
            path=path,
            scheme=scheme,
            user=user,
            password=password,
            context=context,
            **kwargs,
        )
        if user and password:
            self.es = Elasticsearch(self.url, http_auth=(user, password), **self.kwargs)
        else:
            self.es = Elasticsearch(self.url, **self.kwargs)

Source File: search.py From arches with GNU Affero General Public License v3.0

6 votes

def _add_prefix(self, *args, **kwargs):
        if args:
            index = args[0].strip()
        else:
            index = kwargs.get("index", "").strip()
        if index is None or index == "":
            raise NotImplementedError("Elasticsearch index not specified.")

        prefix = "%s_" % self.prefix.strip() if self.prefix and self.prefix.strip() != "" else ""
        ret = []
        for idx in index.split(","):
            ret.append("%s%s" % (prefix, idx))

        index = ",".join(ret)
        if args:
            return index
        else:
            return dict(kwargs, index=index)

Source File: esltr.py From search-MjoLniR with MIT License

6 votes

def __init__(self, client: Elasticsearch):
        super().__init__(client)
        self.cache = CacheClient(client)
        self.store = FeatureStoreClient(client)
        self.feature = FeatureClient(client)
        self.feature_set = FeatureSetClient(client)
        self.model = ModelClient(client)


# Domain objects stored in the plugin. These offer a very simple interface for
# constructing requests and interpreting results of objects stored in the ltr
# plugin.
#
# Note that when encoding these objects to send to the plugin they are almost always
# wrapped in a single-value dict containing the type. So for example to add a feature
# to a feature store:
#
#  feature = StoredFeature('test', ['keywords'], 'mustache', {"match":{"title":"{{keywords}}"}})
#  response = ltr_client.feature.create(feature.name, {'feature': feature.to_dict()})

Source File: __main__.py From elyzer with Apache License 2.0

6 votes

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--es', type=str,
                        help='Root URL to Elasticsearch, ie http://localhost:9200 (defaults to envvar ELYZER_ES_URL or localhost:9200)',
                        action=EnvDefault,
                        required=True,
                        envvar='ELYZER_ES_URL',
                        default='http://localhost:9200')
    parser.add_argument('--index', type=str, action=EnvDefault,
                        required=True, envvar='ELYZER_INDEX',
                        help='Name of the index to find the analyzer, ie tweets (defaults to envvar ELYZER_INDEX)')
    parser.add_argument('--analyzer', type=str, action=EnvDefault, required=True,
                        envvar='ELYZER_ANALYZER',
                        help='Name of the custom analyzer, ie my_text_analyzer (defaults to envvar ELYZER_ANALYZER)')
    parser.add_argument('text', type=str,
                        help='Text to analyze, ie "mary had a little lamb"')
    return vars(parser.parse_args())

Source File: sentiment_to_elastic.py From Gather-Deployment with MIT License

6 votes

def pull_to_elastic(**kwargs):
    ti = kwargs['ti']
    sentiments = ti.xcom_pull(task_ids = 'push_sentiment', key = 'sentiment')
    es = Elasticsearch()
    for i in range(0, len(sentiments), batch_size):
        batch = sentiments[i : min(i + batch_size, len(sentiments))]
        actions = [
            {
                '_index': 'test_index',
                '_type': 'text',
                '_id': '%d-text' % (j + i),
                '_source': batch[j],
            }
            for j in range(len(batch))
        ]
        helpers.bulk(es, actions)

Source File: kafka_bulk_daemon.py From search-MjoLniR with MIT License

6 votes

def indices_map(clusters: List[Elasticsearch]) -> Mapping[str, Elasticsearch]:
    """Map from addressable index name to elasticsearch client that contains it

    Index names that exist on multiple clusters are treated as existing on
    no clusters. Essentially this only tracks indices that are unique to
    the cluster it lives on.
    """
    indices = cast(Dict[str, Optional[Elasticsearch]], dict())
    for elastic in clusters:
        for index_name, data in elastic.indices.get_alias().items():
            for name in [index_name] + list(data['aliases'].keys()):
                if name not in indices:
                    indices[name] = elastic
                # If an index name exists on multiple clusters we
                # pretend it doesn't exist on any of them.
                elif indices[name] != elastic:
                    indices[name] = None
    return {k: v for k, v in indices.items() if v is not None}

Source File: connections.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def create_connection(self, alias='default', **kwargs):
        """
        Construct an instance of ``elasticsearch.Elasticsearch`` and register
        it under given alias.
        """
        kwargs.setdefault('serializer', serializer)
        conn = self._conns[alias] = Elasticsearch(**kwargs)
        return conn

Source File: fixtures.py From elasticsearch-dbapi with Apache License 2.0

5 votes

def delete_index(base_url, index_name):
    es = Elasticsearch(base_url)
    try:
        es.delete_by_query(index=index_name, body={"query": {"match_all": {}}})
    except NotFoundError:
        return

Source File: app.py From Gather-Deployment with MIT License

5 votes

def get_es(index = 'test_index'):
    es = Elasticsearch()
    return Search(using = es, index = index)

Source File: es_manager.py From freezer-api with Apache License 2.0

5 votes

def __init__(self, **options):
        self.mappings = db_mappings.get_mappings().copy()
        self.conf = options.copy()
        self.index = self.conf['index']

        self.elk = elasticsearch.Elasticsearch(**options)
        # check if the cluster is up or not !
        if not self.elk.ping():
            raise Exception('ElasticSearch cluster is not available. '
                            'Cannot ping it')
        # clear the index cache
        try:
            self.elk.indices.clear_cache(index=self.conf['index'])
        except Exception as e:
            LOG.warning(e)

Source File: queryset.py From pyeqs with MIT License

5 votes

def _get_connection(self):
        if not self._conn:
            host_connection_info = self._parse_host_connection_info(self._host)
            self._conn = Elasticsearch(host_connection_info)
        return self._conn

Source File: engine.py From EQUEL with GNU Lesser General Public License v3.0

5 votes

def execute(self, *args, **kwargs):
        """Instantiates base elasticsearch_dsl Search object"""
        es = Elasticsearch(hosts=self.engine.host, timeout=self.engine.timeout)
        result = EQUELResult(es.search(index=self.engine.index, body=self.jsonQuery(), *args, **kwargs))
        for outputname in self.output:
            outputplugin = self.output[outputname]
            result.addOutput(outputname, outputplugin)
        return result

Source File: es.py From KubeOperator with Apache License 2.0

5 votes

def ensure_index_exists(client: Elasticsearch, name):
    if not client.indices.exists(name):
        client.indices.create(name)

Source File: conftest.py From idunn with Apache License 2.0

5 votes

def wiki_client_ko(wiki_es_ko):
    return Elasticsearch([wiki_es_ko])

Source File: conftest.py From idunn with Apache License 2.0

5 votes

def wiki_client(wiki_es):
    return Elasticsearch([wiki_es])

Source File: conftest.py From idunn with Apache License 2.0

5 votes

def mimir_client(mimir_es):
    return Elasticsearch([mimir_es])

Source File: es_wrapper.py From idunn with Apache License 2.0

5 votes

def get_elasticsearch():
    global ES_CONNECTION

    if ES_CONNECTION is None:
        if settings["VERIFY_HTTPS"] is False:
            ES_CONNECTION = Elasticsearch(
                settings["MIMIR_ES"], verify_certs=False, connection_class=RequestsHttpConnection
            )
        else:
            ES_CONNECTION = Elasticsearch(settings["MIMIR_ES"])
    return ES_CONNECTION

Source File: pages_jaunes.py From idunn with Apache License 2.0

5 votes

def __init__(self):
        pj_es_url = settings.get("PJ_ES")

        if pj_es_url:
            self.es = Elasticsearch(pj_es_url, timeout=3.0)
            self.enabled = True
        else:
            self.enabled = False

Source File: msearch_daemon.py From search-MjoLniR with MIT License

5 votes

def es_query_total(cls, cluster: Elasticsearch, index: str, group: str, **kwargs) -> 'MetricMonitor':
        def fetch_stat() -> Optional[float]:
            try:
                response = cluster.indices.stats(index=index, groups=[group], metric='search')
            except elasticsearch.NotFoundError:
                # If our index doesn't exist we can't possibly allow things
                # to continue. Report the metric unavailable and wait for
                # the index to exist.
                log.exception('Index not found while fetching index stats for %s', index)
                return None
            except elasticsearch.TransportError:
                # Connection error to elasticsearch, could be network, restarts, etc.
                log.exception('Transport error while fetching index stats for %s', index)
                return None

            try:
                query_total = response['_all']['total']['search']['groups'][group]['query_total']
                log.debug('Group %s in index %s reported query_total of %d', group, index, query_total)
                return query_total
            except KeyError:
                # Typically this means the group hasn't collected any stats.
                # This could happen after a full cluster restart but before any
                # prod traffic is run through. I'm a bit wary of always
                # returning 0, but it is correct.
                log.info('No stats in index %s for group %s', index, group)
                return 0.0
        return cls(fetch_stat, StreamingEMA(), **kwargs)

Source File: esltr.py From search-MjoLniR with MIT License

5 votes

def __init__(
        self,
        elastic: Elasticsearch,
        model_name: str,
        model_type: str,
        model: Sequence[Mapping],
        feature_source_definition: str,
        features: Sequence[str],
        validation: Optional[ValidationRequest]
    ) -> None:
        self.elastic = elastic
        self.ltr = LtrClient(elastic)
        self.model_name = model_name
        self.model_type = 'model/xgboost+json'  # todo: stop hardcoding
        self.model = model
        self.feature_source_definition = feature_source_definition
        # Break down def into it's pieces
        self.feature_def_type, self.feature_set_name, self.feature_store_name = \
            explode_ltr_model_definition(self.feature_source_definition)
        if self.feature_def_type != 'featureset':
            # This is actually a limitation of the forced featureset
            # minimization, although things could be abstracted to support
            # multiple paths.
            raise NotImplementedError('Can only derive featuresets from other featuresets currently')
        self.features = features
        self.validation = validation

Source File: esltr.py From search-MjoLniR with MIT License

5 votes

def __init__(self, client: Elasticsearch):
        super().__init__(client, 'model')

Source File: esltr.py From search-MjoLniR with MIT License

5 votes

def __init__(self, client: Elasticsearch):
        super().__init__(client, 'featureset')

    # TODO: self._req('POST', store, [feature_set_name, '_createmodel'], body, params)
    # TODO: self._req('POST', store, [feature_set_name, '_addfeatures', feature_prefix_query], params=params)

Source File: esltr.py From search-MjoLniR with MIT License

5 votes

def __init__(self, client: Elasticsearch, store_type: str):
        super().__init__(client)
        self.store_type = '_' + store_type

Python elasticsearch.Elasticsearch() Examples