Python kafka.KafkaConsumer() Examples
The following are 30
code examples of kafka.KafkaConsumer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
kafka
, or try the search function
.
Example #1
Source File: kafka_monitor.py From scrapy-cluster with MIT License | 11 votes |
def _create_consumer(self): """Tries to establing the Kafka consumer connection""" try: brokers = self.settings['KAFKA_HOSTS'] self.logger.debug("Creating new kafka consumer using brokers: " + str(brokers) + ' and topic ' + self.settings['KAFKA_INCOMING_TOPIC']) return KafkaConsumer( self.settings['KAFKA_INCOMING_TOPIC'], group_id=self.settings['KAFKA_GROUP'], bootstrap_servers=brokers, consumer_timeout_ms=self.settings['KAFKA_CONSUMER_TIMEOUT'], auto_offset_reset=self.settings['KAFKA_CONSUMER_AUTO_OFFSET_RESET'], auto_commit_interval_ms=self.settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'], enable_auto_commit=self.settings['KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'], max_partition_fetch_bytes=self.settings['KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES']) except KeyError as e: self.logger.error('Missing setting named ' + str(e), {'ex': traceback.format_exc()}) except: self.logger.error("Couldn't initialize kafka consumer for topic", {'ex': traceback.format_exc(), 'topic': self.settings['KAFKA_INCOMING_TOPIC']}) raise
Example #2
Source File: rest_service.py From scrapy-cluster with MIT License | 7 votes |
def _create_consumer(self): """Tries to establing the Kafka consumer connection""" if not self.closed: try: self.logger.debug("Creating new kafka consumer using brokers: " + str(self.settings['KAFKA_HOSTS']) + ' and topic ' + self.settings['KAFKA_TOPIC_PREFIX'] + ".outbound_firehose") return KafkaConsumer( self.settings['KAFKA_TOPIC_PREFIX'] + ".outbound_firehose", group_id=None, bootstrap_servers=self.settings['KAFKA_HOSTS'], consumer_timeout_ms=self.settings['KAFKA_CONSUMER_TIMEOUT'], auto_offset_reset=self.settings['KAFKA_CONSUMER_AUTO_OFFSET_RESET'], auto_commit_interval_ms=self.settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'], enable_auto_commit=self.settings['KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'], max_partition_fetch_bytes=self.settings['KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES']) except KeyError as e: self.logger.error('Missing setting named ' + str(e), {'ex': traceback.format_exc()}) except: self.logger.error("Couldn't initialize kafka consumer for topic", {'ex': traceback.format_exc()}) raise
Example #3
Source File: client.py From search-MjoLniR with MIT License | 7 votes |
def offsets_for_times(consumer, partitions, timestamp): """Augment KafkaConsumer.offsets_for_times to not return None Parameters ---------- consumer : kafka.KafkaConsumer This consumer must only be used for collecting metadata, and not consuming. API's will be used that invalidate consuming. partitions : list of kafka.TopicPartition timestamp : number Timestamp, in seconds since unix epoch, to return offsets for. Returns ------- dict from kafka.TopicPartition to integer offset """ # Kafka uses millisecond timestamps timestamp_ms = int(timestamp * 1000) response = consumer.offsets_for_times({p: timestamp_ms for p in partitions}) offsets = {} for tp, offset_and_timestamp in response.items(): if offset_and_timestamp is None: # No messages exist after timestamp. Fetch latest offset. consumer.assign([tp]) consumer.seek_to_end(tp) offsets[tp] = consumer.position(tp) else: offsets[tp] = offset_and_timestamp.offset return offsets
Example #4
Source File: kafka_to_rabbitmq.py From dino with Apache License 2.0 | 6 votes |
def run(self) -> None: self.create_loggers() logger.info('sleeping for 3 second before consuming') time.sleep(3) kafka_conf = self.conf.get(ConfigKeys.EXTERNAL_QUEUE) bootstrap_servers = kafka_conf.get(ConfigKeys.HOST) logger.info('bootstrapping from servers: %s' % (str(bootstrap_servers))) topic_name = kafka_conf.get(ConfigKeys.QUEUE) logger.info('consuming from topic {}'.format(topic_name)) group_id = 'dino-kafka-to-rabbitmq' logger.info('using Group ID {}'.format(group_id)) self.consumer = KafkaConsumer( topic_name, group_id=group_id, bootstrap_servers=bootstrap_servers, enable_auto_commit=True, connections_max_idle_ms=180 * ONE_MINUTE, # default: 9min max_poll_interval_ms=10 * ONE_MINUTE, # default: 5min session_timeout_ms=ONE_MINUTE, # default: 10s max_poll_records=10 # default: 500 ) while True: try: self.try_to_read() except InterruptedError: logger.info('got interrupted, shutting down...') break except Exception as e: logger.error('could not read from kafka: {}'.format(str(e))) logger.exception(e) time.sleep(1)
Example #5
Source File: consumer_manager.py From karapace with Apache License 2.0 | 6 votes |
def create_kafka_consumer(self, fetch_min_bytes, group_name, internal_name, request_data): while True: try: c = KafkaConsumer( bootstrap_servers=self.config["bootstrap_uri"], client_id=internal_name, security_protocol=self.config["security_protocol"], ssl_cafile=self.config["ssl_cafile"], ssl_certfile=self.config["ssl_certfile"], ssl_keyfile=self.config["ssl_keyfile"], group_id=group_name, fetch_min_bytes=fetch_min_bytes, fetch_max_bytes=self.config["consumer_request_max_bytes"], request_timeout_ms=request_data["consumer.request.timeout.ms"], enable_auto_commit=request_data["auto.commit.enable"], auto_offset_reset=request_data["auto.offset.reset"] ) return c except: # pylint: disable=bare-except self.log.exception("Unable to create consumer, retrying") await asyncio.sleep(1)
Example #6
Source File: messaging.py From Ad-Insertion-Sample with BSD 3-Clause "New" or "Revised" License | 6 votes |
def debug(self, topic): c=KafkaConsumer(bootstrap_servers=kafka_hosts, client_id=self._client_id , group_id=None, api_version=(0,10)) # assign/subscribe topic partitions=c.partitions_for_topic(topic) if not partitions: raise Exception("Topic "+topic+" not exist") c.assign([TopicPartition(topic,p) for p in partitions]) # seek to beginning if needed c.seek_to_beginning() # fetch messages while True: partitions=c.poll(100) if partitions: for p in partitions: for msg in partitions[p]: yield msg.value.decode('utf-8') yield "" c.close()
Example #7
Source File: test_worker.py From kq with MIT License | 6 votes |
def test_worker_initialization_with_bad_args(hosts, consumer): with pytest.raises(AssertionError) as e: Worker(topic=True, consumer=consumer) assert str(e.value) == 'topic must be a str' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer='bar') assert str(e.value) == 'bad consumer instance' with pytest.raises(AssertionError) as e: bad_consumer = KafkaConsumer(bootstrap_servers=hosts) Worker(topic='topic', consumer=bad_consumer) assert str(e.value) == 'consumer must have group_id' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, callback=1) assert str(e.value) == 'callback must be a callable' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, deserializer=1) assert str(e.value) == 'deserializer must be a callable' with pytest.raises(AssertionError) as e: Worker(topic='topic', consumer=consumer, logger=1) assert str(e.value) == 'bad logger instance'
Example #8
Source File: test_worker.py From kq with MIT License | 6 votes |
def test_worker_properties(worker, hosts, topic, group): assert hosts in repr(worker) assert topic in repr(worker) assert group in repr(worker) assert worker.consumer.config['bootstrap_servers'] == hosts assert worker.consumer.config['group_id'] == group assert isinstance(worker.hosts, str) and worker.hosts == hosts assert isinstance(worker.topic, str) and worker.topic == topic assert isinstance(worker.group, str) and worker.group == group assert isinstance(worker.consumer, KafkaConsumer) assert callable(worker.deserializer) assert callable(worker.callback) or worker.callback is None # noinspection PyTypeChecker
Example #9
Source File: client.py From kzmonitor with MIT License | 6 votes |
def getOffsets(self, topic, partitions, group): """ 指定topic、partition和group, 返回offsets数据 """ try: # 尝试使用zookeeper-storage api获取offsets数据 # 未获得指定group的offsets数据将抛出UnknownTopicOrPartitionError异常 tp = self.client.send_offset_fetch_request(group, [OffsetRequestPayload(topic, p, -1, 1) for p in partitions]) offsets = {p.partition: p.offset for p in tp} except UnknownTopicOrPartitionError: # 收到异常后使用kafka-storage api获取offsets数据 consumer = KafkaConsumer(group_id=group, bootstrap_servers=self.broker, enable_auto_commit=False) tp = [TopicPartition(topic, p) for p in partitions] consumer.assign(tp) offsets = {p.partition: consumer.position(p) for p in tp} return offsets
Example #10
Source File: test_extension.py From scrapy-kafka-export with MIT License | 6 votes |
def setup_class(cls): cls.broker = os.getenv('KAFKA_BROKER') if not cls.topic: topic = "%s-%s" % ('topic_test_', random_string(10)) cls.topic = topic create_topic(cls.topic) cls._deserializer = ScrapyJSONDecoder() cls.consumer = KafkaConsumer( bootstrap_servers=[cls.broker], auto_offset_reset='earliest', group_id=None, value_deserializer=lambda x: cls._deserializer.decode(x.decode('utf8')) ) cls.consumer.subscribe([cls.topic])
Example #11
Source File: test_kafka.py From py-timeexecution with Apache License 2.0 | 6 votes |
def _query_backend(self): consumer = KafkaConsumer( bootstrap_servers=KAFKA_HOST, value_deserializer=lambda v: JSONSerializer().loads(v.decode('utf-8')) ) tp = TopicPartition(self.topic, 0) consumer.assign([tp]) count = consumer.position(tp) consumer.seek(tp, 0) metrics = [] for i in range(count): metrics.append(next(consumer)) return metrics
Example #12
Source File: __init__.py From platypush with MIT License | 6 votes |
def run(self): from kafka import KafkaConsumer super().run() self.consumer = KafkaConsumer(self.topic, bootstrap_servers=self.server) self.logger.info('Initialized kafka backend - server: {}, topic: {}' .format(self.server, self.topic)) try: for msg in self.consumer: self._on_record(msg) if self.should_stop(): break except Exception as e: self.logger.warning('Kafka connection error, reconnecting in {} seconds'. format(self._conn_retry_secs)) self.logger.exception(e) time.sleep(self._conn_retry_secs) # vim:sw=4:ts=4:et:
Example #13
Source File: bot.py From fooltrader with MIT License | 6 votes |
def run(self): self.logger.info("start bot:{}".format(self)) funcs = set(dir(self)) & self.func_map_topic.keys() consumer = KafkaConsumer(bootstrap_servers=[KAFKA_HOST]) current_topics = consumer.topics() for func in funcs: topic = self.func_map_topic.get(func) if topic not in current_topics: self.logger.exception("you implement func:{},but the topic:{} for it not exist".format(func, topic)) continue self._threads.append( threading.Thread(target=self.consume_topic_with_func, args=(self.func_map_topic.get(func), func))) for the_thread in self._threads: the_thread.start() self.consume_topic_with_func(self.quote_topic, 'on_event') self.logger.info("finish bot:{}".format(self))
Example #14
Source File: base_bot.py From fooltrader with MIT License | 6 votes |
def run(self): self.logger.info("start bot:{}".format(self)) funcs = set(dir(self)) & self.func_map_topic.keys() consumer = KafkaConsumer(bootstrap_servers=[KAFKA_HOST]) current_topics = consumer.topics() for func in funcs: topic = self.func_map_topic.get(func) if topic not in current_topics: self.logger.exception("you implement func:{},but the topic:{} for it not exist".format(func, topic)) continue self.threads.append( threading.Thread(target=self.consume_topic_with_func, args=(self.func_map_topic.get(func), func))) for the_thread in self.threads: the_thread.start() self.consume_topic_with_func(self.quote_topic, 'on_event') self.logger.info("finish bot:{}".format(self))
Example #15
Source File: collector.py From operations-software-druid_exporter with Apache License 2.0 | 6 votes |
def pull_datapoints_from_kafka(self, kafka_config, stop_threads): log.debug('Kafka datapoints puller thread starting..') consumer = KafkaConsumer( kafka_config['topic'], group_id=kafka_config['group_id'], bootstrap_servers=kafka_config['bootstrap_servers']) while True and not stop_threads.isSet(): consumer.poll() for message in consumer: try: json_message = json.loads(message.value.decode()) log.debug('Datapoint from kafka: %s', json_message) if type(json_message) == list: for datapoint in json_message: self.register_datapoint(datapoint) else: self.register_datapoint(json_message) except json.JSONDecodeError: log.exception("Failed to decode message from Kafka, skipping..") except Exception as e: log.exception("Generic exception while pulling datapoints from Kafka") log.debug('Kafka datapoints puller thread shutting down..')
Example #16
Source File: open_nti_input_syslog_lib.py From open-nti with Apache License 2.0 | 6 votes |
def check_kafka_msg(topic='events', nbr_msg=100): ## Collect Messages from Bus consumer = KafkaConsumer( bootstrap_servers=get_external_ip()+':'+str(KAFKA_BROKER_PORT), auto_offset_reset='earliest') consumer.subscribe([topic]) counter = 0 for message in consumer: counter = counter + 1 if counter == nbr_msg: break return counter
Example #17
Source File: kafka_manager.py From python-socketio with MIT License | 5 votes |
def __init__(self, url='kafka://localhost:9092', channel='socketio', write_only=False): if kafka is None: raise RuntimeError('kafka-python package is not installed ' '(Run "pip install kafka-python" in your ' 'virtualenv).') super(KafkaManager, self).__init__(channel=channel, write_only=write_only) self.kafka_url = url[8:] if url != 'kafka://' else 'localhost:9092' self.producer = kafka.KafkaProducer(bootstrap_servers=self.kafka_url) self.consumer = kafka.KafkaConsumer(self.channel, bootstrap_servers=self.kafka_url)
Example #18
Source File: offsets_for_timestamp.py From kafka-utils with Apache License 2.0 | 5 votes |
def run(cls, args, cluster_config): # Set up the KafkaConsumer consumer = KafkaConsumer( bootstrap_servers=cluster_config.broker_list, client_id="kafka-consumer-manager", group_id=args.groupid, enable_auto_commit=False, ) if args.groupid: args.topics = cls.get_topics_from_consumer_group_id(cluster_config, args.groupid) partition_to_offset = topic_offsets_for_timestamp(consumer, args.timestamp, topics=args.topics) cls.print_offsets(partition_to_offset, args.timestamp)
Example #19
Source File: open_nti_input_syslog_lib.py From open-nti with Apache License 2.0 | 5 votes |
def check_kafka_is_running(): # Verify we can connect to Kafka time.sleep(2) consumer = KafkaConsumer(bootstrap_servers=get_external_ip()+':'+str(KAFKA_BROKER_PORT), auto_offset_reset='earliest') mytopic = consumer.topics() return 1
Example #20
Source File: KafkaSource.py From grease with MIT License | 5 votes |
def create_consumer(ioc, config): """Creates a KafkaConsumer object from the params in config Args: ioc (GreaseContainer): Used for logging since we can't use self in threads config (dict): Configuration for a Kafka Model Returns: kafka.KafkaConsumer: KafkaConsumer object initialized with params from config """ consumer = None while not consumer: try: consumer = KafkaConsumer( group_id=config.get('name'), *config.get('topics'), **{'bootstrap_servers': ",".join(config.get('servers'))} ) except kafka.errors.NoBrokersAvailable: ioc.getLogger().error("No Kafka brokers available for config: {0}, retrying.".format(config.get('name'))) KafkaSource.sleep(SLEEP_TIME) ioc.getLogger().info("Kafka consumer created under group_id: {0}".format(config.get('name'))) KafkaSource.sleep(SLEEP_TIME) # Gives the consumer time to initialize return consumer
Example #21
Source File: KafkaSource.py From grease with MIT License | 5 votes |
def reallocate_consumers(ioc, config, monitor_consumer, threads): """Determines whether to create or kill a consumer based on current message backlog, then performs that action Args: ioc (GreaseContainer): Used for logging since we can't use self in threads config (dict): Configuration for a Kafka model monitor_consumer (kafka.KafkaConsumer): KafkaConsumer used solely for measuring message backlog threads (list[(threading.Thread, multiprocessing.Pipe)]): List of current consumer thread/pipe pairs Returns: int: Number of threads created (Negative value if a thread was killed) """ min_backlog = config.get("min_backlog", MIN_BACKLOG) max_backlog = config.get("max_backlog", MAX_BACKLOG) max_consumers = config.get("max_consumers", MAX_CONSUMERS) backlog1 = KafkaSource.get_backlog(ioc, monitor_consumer) KafkaSource.sleep(SLEEP_TIME) # We want to wait before checking again in case there is a message spike backlog2 = KafkaSource.get_backlog(ioc, monitor_consumer) if backlog1 > max_backlog and backlog2 > max_backlog and len(threads) < max_consumers: threads.append(KafkaSource.create_consumer_thread(ioc, config)) ioc.getLogger().info("Backlog max reached, spawning a new consumer for {0}".format(config.get('name')), verbose=True) return 1 elif backlog1 <= min_backlog and backlog2 <= min_backlog and len(threads) > 1: KafkaSource.kill_consumer_thread(ioc, threads[0]) ioc.getLogger().info("Backlog min reached, killing a consumer for {0}".format(config.get('name')), verbose=True) return -1 ioc.getLogger().info("No reallocation needed for {0}".format(config.get('name'))) return 0
Example #22
Source File: KafkaSource.py From grease with MIT License | 5 votes |
def get_backlog(ioc, consumer): """Gets the current message backlog for a given consumer Args: ioc (GreaseContainer): Used for logging since we can't use self in threads consumer (kafka.KafkaConsumer): The consumer used to poll backlog offsets Returns: float: the average number of messages accross all partitions in the backlog. -1 if there is an error and excess consumers should be killed """ if not consumer.assignment(): ioc.getLogger().trace("Assigning consumer to topic", trace=True) consumer.poll() # We need to poll the topic to actually get assigned partitions = consumer.assignment() if not partitions: ioc.getLogger().error("No partitions found for kafka consumer") return -1. try: current_offsets = [consumer.position(part) for part in partitions] end_offsets = list(consumer.end_offsets(partitions).values()) except kafka.errors.KafkaTimeoutError: ioc.getLogger().error("KafkaTimeout during backlog check") return -1. except kafka.errors.UnsupportedVersionError: ioc.getLogger().error("This version of kafka does not support backlog lookups") return -1. if not current_offsets or not end_offsets or len(current_offsets) != len(end_offsets): ioc.getLogger().error("Backlog check failed for kafka consumer - invalid offsets") return -1. return float(sum(end_offsets) - sum(current_offsets)) / len(partitions)
Example #23
Source File: consumer_group.py From yelp_kafka with Apache License 2.0 | 5 votes |
def _acquire(self, partitions): if not self.consumer: self.consumer = KafkaConsumer(partitions, **self.config) else: self.consumer.set_topic_partitions(partitions) if self.post_rebalance_callback: self.post_rebalance_callback(partitions) # set_topic_partitions causes a metadata request, which may fail on the # first try.
Example #24
Source File: sniffer_nebula_test.py From sniffer with Apache License 2.0 | 5 votes |
def __init__(self, bootstrap_servers, kafkatopic): self.kafkatopic = kafkatopic self.consumer = KafkaConsumer(self.kafkatopic, bootstrap_servers=bootstrap_servers)
Example #25
Source File: kafkaio.py From beam-nuggets with MIT License | 5 votes |
def process(self, config): consumer_config = dict(config) topic = consumer_config.pop('topic') consumer = KafkaConsumer(topic, **consumer_config) for msg in consumer: try: yield (msg.key, msg.value.decode()) except Exception as e: print(e) continue
Example #26
Source File: 13_3_snas_log_consumer.py From Python-Network-Programming-Cookbook-Second-Edition with MIT License | 5 votes |
def main(conf): # Enable to topics/feeds topics = [ 'openbmp.parsed.router', 'openbmp.parsed.peer', 'openbmp.parsed.collector', 'openbmp.parsed.bmp_stat', 'openbmp.parsed.unicast_prefix', 'openbmp.parsed.ls_node', 'openbmp.parsed.ls_link', 'openbmp.parsed.ls_prefix', 'openbmp.parsed.l3vpn' ] # Read config file with open(conf, 'r') as f: config_content = yaml.load(f) bootstrap_server = config_content['bootstrap_servers'] try: # connect and bind to topics print ("Connecting to kafka... takes a minute to load offsets and topics, please wait") consumer = kafka.KafkaConsumer( *topics, bootstrap_servers=bootstrap_server, client_id="dev-testing" + str(time.time()), group_id="dev-testing" + str(time.time()), enable_auto_commit=True, auto_commit_interval_ms=1000, auto_offset_reset="largest" ) print ("Now consuming/waiting for messages...") for m in consumer: process_message(m) except kafka.common.KafkaUnavailableError as err: print ("Kafka Error: %s" % str(err)) except KeyboardInterrupt: print ("User stop requested")
Example #27
Source File: app.py From Python-DevOps with MIT License | 5 votes |
def get_topics(topic): consumer = KafkaConsumer( topic, auto_offset_reset = 'earliest', bootstrap_servers = ['localhost:9092'], api_version = (0, 10), consumer_timeout_ms = 1000, ) return json.dumps( [json.loads(msg.value.decode('utf-8')) for msg in consumer] )
Example #28
Source File: 22_3_snas_log_consumer.py From Python-Network-Programming with MIT License | 5 votes |
def main(conf): # Enable to topics/feeds topics = [ 'openbmp.parsed.router', 'openbmp.parsed.peer', 'openbmp.parsed.collector', 'openbmp.parsed.bmp_stat', 'openbmp.parsed.unicast_prefix', 'openbmp.parsed.ls_node', 'openbmp.parsed.ls_link', 'openbmp.parsed.ls_prefix', 'openbmp.parsed.l3vpn' ] # Read config file with open(conf, 'r') as f: config_content = yaml.load(f) bootstrap_server = config_content['bootstrap_servers'] try: # connect and bind to topics print ("Connecting to kafka... takes a minute to load offsets and topics, please wait") consumer = kafka.KafkaConsumer( *topics, bootstrap_servers=bootstrap_server, client_id="dev-testing" + str(time.time()), group_id="dev-testing" + str(time.time()), enable_auto_commit=True, auto_commit_interval_ms=1000, auto_offset_reset="largest" ) print ("Now consuming/waiting for messages...") for m in consumer: process_message(m) except kafka.common.KafkaUnavailableError as err: print ("Kafka Error: %s" % str(err)) except KeyboardInterrupt: print ("User stop requested")
Example #29
Source File: conftest.py From kq with MIT License | 5 votes |
def consumer(hosts, group): return KafkaConsumer( bootstrap_servers=hosts, group_id=group, auto_offset_reset='earliest', ) # noinspection PyShadowingNames
Example #30
Source File: kafkadriver.py From sniffer with Apache License 2.0 | 5 votes |
def start(self): self.consumer = KafkaConsumer(self.topics,**self.config) self.bg_task = run_in_thread(self.bg_processing)