Python kafka.KafkaClient() Examples
The following are 25
code examples of kafka.KafkaClient().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
kafka
, or try the search function
.
Example #1
Source File: main.py From kafka-utils with Apache License 2.0 | 6 votes |
def get_partition_leaders(cluster_config): """Return the current leaders of all partitions. Partitions are returned as a "topic-partition" string. :param cluster_config: the cluster :type cluster_config: kafka_utils.utils.config.ClusterConfig :returns: leaders for partitions :rtype: map of ("topic-partition", broker_id) pairs """ client = KafkaClient(cluster_config.broker_list) result = {} for topic, topic_data in six.iteritems(client.topic_partitions): for partition, p_data in six.iteritems(topic_data): topic_partition = topic + "-" + str(partition) result[topic_partition] = p_data.leader return result
Example #2
Source File: test_consumer.py From yelp_kafka with Apache License 2.0 | 6 votes |
def test_commit_message_zk(self, config): if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: return with mock_kafka() as (mock_client, mock_consumer): config._config['offset_storage'] = 'zookeeper' consumer = KafkaSimpleConsumer('test_topic', config) consumer.connect() actual = consumer.commit_message( Message(0, 100, 'mykey', 'myvalue'), ) assert actual is True mock_client.return_value.send_offset_commit_request \ .assert_called_once_with( 'test_group'.encode(), [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], )
Example #3
Source File: test_consumer.py From yelp_kafka with Apache License 2.0 | 6 votes |
def test_commit_message_kafka(self, config): if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: return with mock_kafka() as (mock_client, mock_consumer): config._config['offset_storage'] = 'kafka' consumer = KafkaSimpleConsumer('test_topic', config) consumer.connect() actual = consumer.commit_message( Message(0, 100, 'mykey', 'myvalue'), ) assert actual is True assert not mock_client.return_value.send_offset_commit_request.called mock_client.return_value.send_offset_commit_request_kafka \ .assert_called_once_with( 'test_group'.encode(), [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], )
Example #4
Source File: discovery.py From yelp_kafka with Apache License 2.0 | 6 votes |
def discover_topics(cluster): """Get all the topics in a cluster :param cluster: config of the cluster to get topics from :type cluster: ClusterConfig :returns: a dict <topic>: <[partitions]> :raises DiscoveryError: upon failure to request topics from kafka """ client = KafkaClient(cluster.broker_list) try: topics = get_kafka_topics(client) return dict([(topic.decode(), partitions) for topic, partitions in six.iteritems(topics)]) except: log.exception( "Topics discovery failed for %s", cluster.broker_list ) raise DiscoveryError("Failed to get topics information from " "{cluster}".format(cluster=cluster))
Example #5
Source File: discovery.py From yelp_kafka with Apache License 2.0 | 6 votes |
def get_kafka_connection(cluster_type, client_id, **kwargs): """Get a kafka connection for the local region kafka cluster at Yelp. :param cluster_type: kafka cluster type (ex.'scribe' or 'standard'). :type cluster_type: string :param client_id: client_id to be used to connect to kafka. :type client_id: string :param kwargs: parameters to pass along when creating the KafkaClient instance. :returns: KafkaClient :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster. """ cluster = get_region_cluster(cluster_type, client_id) try: return KafkaClient(cluster.broker_list, client_id=client_id, **kwargs) except: log.exception( "Connection to kafka cluster %s using broker list %s failed", cluster.name, cluster.broker_list ) raise DiscoveryError("Failed to connect to cluster {0}".format( cluster.name))
Example #6
Source File: consumer.py From yelp_kafka with Apache License 2.0 | 6 votes |
def connect(self): """ Connect to kafka and create a consumer. It uses config parameters to create a kafka-python KafkaClient and SimpleConsumer. """ # Instantiate a kafka client connected to kafka. self.client = KafkaClient( self.config.broker_list, client_id=self.config.client_id ) # Create a kafka SimpleConsumer. self.kafka_consumer = SimpleConsumer( client=self.client, topic=self.topic, partitions=self.partitions, **self.config.get_simple_consumer_args() ) self.log.debug( "Connected to kafka. Topic %s, partitions %s, %s", self.topic, self.partitions, ','.join(['{0} {1}'.format(k, v) for k, v in six.iteritems(self.config.get_simple_consumer_args())]) ) self.kafka_consumer.provide_partition_info()
Example #7
Source File: redis-monitor.py From openslack-crawler with Apache License 2.0 | 5 votes |
def setup(self): ''' Connection stuff here so we can mock it ''' self.redis_conn = redis.Redis(host=REDIS_HOST, port=REDIS_PORT) # set up kafka self.kafka_conn = KafkaClient(KAFKA_HOSTS) self.producer = SimpleProducer(self.kafka_conn) self.topic_prefix = KAFKA_TOPIC_PREFIX
Example #8
Source File: context.py From panoptes with Apache License 2.0 | 5 votes |
def _get_kafka_client(self): """ Create and return a Kafka Client Returns: KafkaClient: The created Kafka client Raises: PanoptesContextError: Passes through any exceptions that happen in trying to create the Kafka client """ # The logic of the weird check that follows is this: KafkaClient initialization can fail if there is a problem # connecting with even one broker. What we want to do is: succeed if the client was able to connect to even one # broker. So, we catch the exception and pass it through - and then check the number of brokers connected to the # client in the next statement (if not kafka_client.brokers) and fail if the client is not connected to any # broker self.__logger.info(u'Attempting to connect Kafka') config = self.__config kafka_client = None try: kafka_client = KafkaClient(config.kafka_brokers) except ConnectionError: pass if not kafka_client.brokers: raise PanoptesContextError(u'Could not connect to any Kafka broker from this list: %s' % config.kafka_brokers) self.__logger.info(u'Successfully connected to Kafka brokers: %s' % kafka_client.brokers) return kafka_client
Example #9
Source File: containers_test.py From data_pipeline with Apache License 2.0 | 5 votes |
def test_get_kafka_connection(containers): """ Asserts that the method returns a working kafka client connection. """ kafka_connection = containers.get_kafka_connection(timeout_seconds=1) assert isinstance(kafka_connection, KafkaClient)
Example #10
Source File: kafka_docker.py From data_pipeline with Apache License 2.0 | 5 votes |
def setup_capture_new_messages_consumer(topic): """Seeks to the tail of the topic then returns a function that can consume messages from that point. """ kafka = KafkaClient(get_config().cluster_config.broker_list) group = str('data_pipeline_clientlib_test') consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE) consumer.seek(0, 2) # seek to tail, 0 is the offset, and 2 is the tail yield consumer kafka.close()
Example #11
Source File: _kafka_producer.py From data_pipeline with Apache License 2.0 | 5 votes |
def __init__(self, producer_position_callback, dry_run=False): self.producer_position_callback = producer_position_callback self.dry_run = dry_run self.kafka_client = KafkaClient(get_config().cluster_config.broker_list) self.position_data_tracker = PositionDataTracker() self._reset_message_buffer() self.skip_messages_with_pii = get_config().skip_messages_with_pii self._publish_retry_policy = RetryPolicy( ExpBackoffPolicy(with_jitter=True), max_retry_count=get_config().producer_max_publish_retry_count ) self._automatic_flush_enabled = True
Example #12
Source File: tailer.py From data_pipeline with Apache License 2.0 | 5 votes |
def _configure_tools(self): load_default_config( self.options.config_file, self.options.env_config_file ) # We setup logging 'early' since we want it available for setup_topics self._setup_logging() self.kafka_client = KafkaClient(get_config().cluster_config.broker_list) self._setup_topics() if len(self.topic_to_offsets_map) == 0: self.option_parser.error("At least one topic must be specified.") if self.options.start_timestamp is not None and self.options.start_timestamp >= int(time.time()): self.option_parser.error("--start-timestamp should not be later than current time") if self.options.start_timestamp is not None and self.options.end_timestamp and ( self.options.start_timestamp > self.options.end_timestamp ): self.option_parser.error("--end-timestamp must not be smaller than --start-timestamp") if self.options.all_fields: self.options.fields = self._public_message_field_names self._verify_offset_ranges()
Example #13
Source File: base_command.py From data_pipeline with Apache License 2.0 | 5 votes |
def _kafka_client(self): kafka_client = KafkaClient(self.config.cluster_config.broker_list) try: yield kafka_client finally: kafka_client.close()
Example #14
Source File: base_consumer.py From data_pipeline with Apache License 2.0 | 5 votes |
def kafka_client(self): """ Returns the `KafkaClient` object.""" return KafkaClient(self._region_cluster_config.broker_list)
Example #15
Source File: client.py From kzmonitor with MIT License | 5 votes |
def __init__(self, broker): self.broker = broker self.client = KafkaClient(broker, timeout=3)
Example #16
Source File: consumer.py From yelp_kafka with Apache License 2.0 | 5 votes |
def close(self): """Disconnect from kafka. If auto_commit is enabled commit offsets before disconnecting. """ if self.kafka_consumer.auto_commit is True: try: self.commit() except: self.log.exception("Commit error. " "Offsets may not have been committed") # Close all the connections to kafka brokers. KafkaClient open # connections to all the partition leaders. self.client.close()
Example #17
Source File: discovery.py From yelp_kafka with Apache License 2.0 | 5 votes |
def get_all_kafka_connections(cluster_type, client_id, **kwargs): """Get a kafka connection for each available kafka cluster at Yelp. :param cluster_type: kafka cluster type (ex.'scribe' or 'standard'). :type cluster_type: string :param client_id: client_id to be used to connect to kafka. :type client_id: string :param kwargs: parameters to pass along when creating the KafkaClient instance. :returns: list (cluster_name, KafkaClient) :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster. .. note:: This function creates a KafkaClient for each cluster in a region and tries to connect to it. If a cluster is not available it fails and closes all the previous connections. """ clusters = get_all_clusters(cluster_type, client_id) connected_clusters = [] for cluster in clusters: try: client = KafkaClient(cluster.broker_list, client_id=client_id, **kwargs) connected_clusters.append((cluster.name, client)) except: log.exception( "Connection to kafka cluster %s using broker list %s failed", cluster.name, cluster.broker_list ) for _, client in connected_clusters: client.close() raise DiscoveryError("Failed to connect to cluster {0}".format( cluster.name)) return connected_clusters
Example #18
Source File: test_consumer.py From yelp_kafka with Apache License 2.0 | 5 votes |
def mock_kafka(): with mock.patch('yelp_kafka.consumer.KafkaClient', autospec=True) as mock_client: with mock.patch('yelp_kafka.consumer.SimpleConsumer', autospec=True) as mock_consumer: mock_consumer.return_value.auto_commit = True yield mock_client, mock_consumer
Example #19
Source File: test_consumer.py From yelp_kafka with Apache License 2.0 | 5 votes |
def test_simple_consumer(): topic = create_random_topic(1, 1) messages = [str(i).encode("UTF-8") for i in range(100)] cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) producer.send_messages(topic, *messages) config = KafkaConsumerConfig( 'test', cluster_config, auto_offset_reset='smallest', auto_commit=False, consumer_timeout_ms=1000 ) consumer = KafkaSimpleConsumer(topic, config) with consumer: for expected_offset in range(100): message = consumer.get_message() assert message.offset == expected_offset assert message.partition == 0 assert message.value == str(expected_offset).encode("UTF-8")
Example #20
Source File: api.py From flasfka with MIT License | 5 votes |
def get_kafka_client(): if not hasattr(flask.g, "kafka_client"): flask.g.kafka_client = kafka.KafkaClient(app.config["HOSTS"]) return flask.g.kafka_client
Example #21
Source File: kafka_random_reader.py From exporters with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, *args, **kwargs): import kafka super(KafkaRandomReader, self).__init__(*args, **kwargs) brokers = self.read_option('brokers') group = self.read_option('group') topic = self.read_option('topic') client = kafka.KafkaClient(map(bytes, brokers)) # TODO: Remove this comments when next steps are decided. # If resume is set to true, then child should not load initial offsets # child_loads_initial_offsets = False if settings.get('RESUME') else True # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1, # child_loads_initial_offsets=child_loads_initial_offsets, # auto_commit=False) self.consumer = kafka.SimpleConsumer(client, group, topic, auto_commit=False) self.decompress_fun = zlib.decompress self.processor = self.create_processor() self.partitions = client.get_partition_ids_for_topic(topic) self.logger.info( 'KafkaRandomReader has been initiated. ' 'Topic: {}. Group: {}'.format(self.read_option('topic'), self.read_option('group'))) self.logger.info('Running random sampling') self._reservoir = self.fill_reservoir() self.logger.info('Random sampling completed, ready to process batches')
Example #22
Source File: tweet_sampler.py From straw with MIT License | 5 votes |
def __init__(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET, host, port): super(KafkaStrawStreamer, self).__init__(APP_KEY, APP_SECRET,OAUTH_TOKEN, OAUTH_TOKEN_SECRET) # connect to Kafka print("Connecting to Kafka node {0}:{1}".format(host, port)) kafka = KafkaClient("{0}:{1}".format(host, port)) self.producer = BufferedSimpleProducer(kafka, 100)
Example #23
Source File: context.py From panoptes with Apache License 2.0 | 5 votes |
def kafka_client(self): """ A Kafka client Returns: KafkaClient """ return self._kafka_client
Example #24
Source File: test_consumer.py From yelp_kafka with Apache License 2.0 | 4 votes |
def run_kafka_consumer_group_test(num_consumers, num_partitions): topic = create_random_topic(1, num_partitions) cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) config = KafkaConsumerConfig( 'test', cluster_config, auto_offset_reset='smallest', partitioner_cooldown=5, auto_commit_interval_messages=1, ) queue = Queue() def create_consumer(): def consume(): consumer = KafkaConsumerGroup([topic], config) with consumer: while True: try: message = consumer.next() queue.put(message) consumer.task_done(message) except ConsumerTimeout: return p = Process(target=consume) p.daemon = True return p consumer_processes = [create_consumer() for _ in range(num_consumers)] for consumer_process in consumer_processes: consumer_process.start() producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) for i in range(100): producer.send_messages(topic, str(i).encode("UTF-8")) # wait until all 100 messages have been consumed while queue.qsize() < 100: time.sleep(0.1) received_messages = [] while True: try: message = queue.get(block=True, timeout=0.5) except Empty: break received_messages.append(int(message.value)) assert [i for i in range(100)] == sorted(received_messages)
Example #25
Source File: straw_app.py From straw with MIT License | 4 votes |
def __init__(self, config): app = Flask(__name__) app.secret_key = 'i love to search full text in real time' # attach a redis connection pool app.pool = redis.ConnectionPool(host="localhost", port=6379) # user -> channels mapping app.user_channels = {} # how to handle messages that enter the stream from redis pub sub def redis_message_handler(msg): redis_connection = redis.Redis(connection_pool=app.pool) # get channel and content of incoming message channel = msg['channel'] data = msg['data'] # word highlighting -- TODO: this would be better to do in the search engine! query = redis_connection.get(channel) words = list(set(query.split(" "))) for w in words: data=data.lower().replace(w.lower(), highlight(w.lower())) # find users subscribed to this channel if app.user_channels.get(channel) is not None: for user in app.user_channels.get(channel): redis_connection.lpush(user, data) else: # no more users for this channel, unsubscribe from it redis_connection.unsubscribe(channel) # Add Redis query subscriber to app app.disp = [] app.subscriber = QuerySubscriber("localhost", 6379, redis_message_handler) # setup kafka producer in the app kafka = KafkaClient("{0}:{1}".format(config["zookeeper_host"], 9092)) app.producer = SimpleProducer(kafka) # add the app self.app = app