Python kafka.TopicPartition() Examples
The following are 14
code examples of kafka.TopicPartition().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
kafka
, or try the search function
.
Example #1
Source File: client.py From search-MjoLniR with MIT License | 7 votes |
def offsets_for_times(consumer, partitions, timestamp): """Augment KafkaConsumer.offsets_for_times to not return None Parameters ---------- consumer : kafka.KafkaConsumer This consumer must only be used for collecting metadata, and not consuming. API's will be used that invalidate consuming. partitions : list of kafka.TopicPartition timestamp : number Timestamp, in seconds since unix epoch, to return offsets for. Returns ------- dict from kafka.TopicPartition to integer offset """ # Kafka uses millisecond timestamps timestamp_ms = int(timestamp * 1000) response = consumer.offsets_for_times({p: timestamp_ms for p in partitions}) offsets = {} for tp, offset_and_timestamp in response.items(): if offset_and_timestamp is None: # No messages exist after timestamp. Fetch latest offset. consumer.assign([tp]) consumer.seek_to_end(tp) offsets[tp] = consumer.position(tp) else: offsets[tp] = offset_and_timestamp.offset return offsets
Example #2
Source File: test_kafka.py From py-timeexecution with Apache License 2.0 | 6 votes |
def _query_backend(self): consumer = KafkaConsumer( bootstrap_servers=KAFKA_HOST, value_deserializer=lambda v: JSONSerializer().loads(v.decode('utf-8')) ) tp = TopicPartition(self.topic, 0) consumer.assign([tp]) count = consumer.position(tp) consumer.seek(tp, 0) metrics = [] for i in range(count): metrics.append(next(consumer)) return metrics
Example #3
Source File: kafka_utils_class.py From warriorframework with Apache License 2.0 | 6 votes |
def assign_partitions(self, partitions): """ Assign partitions to consumer. Arguments: partitions(list) : list of [topic, partition] lists example : [[topic1,1], [topic2,1]] Returns: None. """ print_info("assigning partitions to consumer {}".format(partitions)) topic_partitions = [TopicPartition(topic=tup[0], partition=tup[1]) for tup in partitions] try: self.kafka_consumer.assign(topic_partitions) result = True except KafkaError as exc: print_error("Exception during assiging partitions - {}".format(exc)) result = False return result
Example #4
Source File: kafka_utils_class.py From warriorframework with Apache License 2.0 | 6 votes |
def seek_to_position(self, topic, partition, offset): """ Seek to the given offset. Arguments: topic(str): topic name partition(int): partition number offset(int): offset number Returns: result(bool) : False if exception occures, True otherwise """ print_info("seeking to position {}:{}:{}".format(topic, partition, offset)) topic_partition = TopicPartition(topic=topic, partition=partition) try: self.kafka_consumer.seek(partition=topic_partition, offset=offset) result = True except KafkaError as exc: print_error("Exception during seek - {}".format(exc)) result = False return result
Example #5
Source File: messaging.py From Ad-Insertion-Sample with BSD 3-Clause "New" or "Revised" License | 6 votes |
def debug(self, topic): c=KafkaConsumer(bootstrap_servers=kafka_hosts, client_id=self._client_id , group_id=None, api_version=(0,10)) # assign/subscribe topic partitions=c.partitions_for_topic(topic) if not partitions: raise Exception("Topic "+topic+" not exist") c.assign([TopicPartition(topic,p) for p in partitions]) # seek to beginning if needed c.seek_to_beginning() # fetch messages while True: partitions=c.poll(100) if partitions: for p in partitions: for msg in partitions[p]: yield msg.value.decode('utf-8') yield "" c.close()
Example #6
Source File: test_client.py From search-MjoLniR with MIT License | 5 votes |
def test_offset_for_times(mocker): partitions = [kafka.TopicPartition('ut_topic', 0)] offsets_for_times = {tp: OffsetAndTimestamp(42, -1) for tp in partitions} positions = {tp: 747 for tp in partitions} mock = mocker.Mock() mock.offsets_for_times.return_value = offsets_for_times mock.position.side_effect = lambda tp: positions.get(tp, 0) # Uses returned offset for time when provided offsets = client.offsets_for_times(mock, partitions, 987654321) assert len(offsets) == len(partitions) assert all(tp in offsets for tp in partitions) assert offsets[partitions[0]] == 42 # When offsets_for_times returns None returns position at end offsets_for_times[partitions[0]] = None offsets = client.offsets_for_times(mock, partitions, 987654321) assert len(offsets) == len(partitions) assert all(tp in offsets for tp in partitions) assert offsets[partitions[0]] == 747
Example #7
Source File: client.py From search-MjoLniR with MIT License | 5 votes |
def offset_range_for_timestamp_range(brokers, start, end, topic): """Determine OffsetRange for a given timestamp range Parameters ---------- client_config : ClientConfig start : number Unix timestamp in seconds end : number Unix timestamp in seconds topic : str Topic to fetch offsets for Returns ------- list of OffsetRange or None Per-partition ranges of offsets to read """ consumer = kafka.KafkaConsumer(bootstrap_servers=brokers) partitions = consumer.partitions_for_topic(topic) if partitions is None: # Topic does not exist. return None partitions = [kafka.TopicPartition(topic, p) for p in partitions] o_start = offsets_for_times(consumer, partitions, start) o_end = offsets_for_times(consumer, partitions, end) return [OffsetRange(tp, o_start[tp], o_end[tp]) for tp in partitions]
Example #8
Source File: inference_cache.py From rafiki with Apache License 2.0 | 5 votes |
def pop_queries_for_worker(self, worker_id: str, batch_size: int) -> List[Query]: name = f'workers_{worker_id}_queries' query_consumer = KafkaConsumer(name, bootstrap_servers=self.connection_url, auto_offset_reset='earliest', group_id=QUERIES_QUEUE) partition = TopicPartition(name, 0) partitiondic = query_consumer.end_offsets([partition]) offsetend = partitiondic.get(partition, None) if offsetend == 0: query_consumer.close() return [] try: queries = [] while True: record = next(query_consumer) queries.append(record.value) query_consumer.commit() if record.offset >= offsetend-1 or len(queries) == batch_size: break queries = [pickle.loads(x) for x in queries] query_consumer.close() return queries except KafkaError: query_consumer.close() return []
Example #9
Source File: kafka_utils.py From fooltrader with MIT License | 5 votes |
def get_latest_timestamp_order_from_topic(topic): consumer = KafkaConsumer(topic, # client_id='fooltrader', # group_id='fooltrader', value_deserializer=lambda m: json.loads(m.decode('utf8')), bootstrap_servers=[KAFKA_HOST]) topic_partition = TopicPartition(topic=topic, partition=0) end_offset = consumer.end_offsets([topic_partition])[topic_partition] if end_offset > 0: # partition assigned after poll, and we could seek consumer.poll(5, 1) consumer.seek(topic_partition, end_offset - 1) message = consumer.poll(10000, 500) msgs = message[topic_partition] if len(msgs) > 0: record = msgs[-1] timestamp = to_timestamp(record.value['timestamp']) order = None if 'order' in record.value: order = record.value['order'] return timestamp, order return None, None
Example #10
Source File: kafka.py From django-logpipe with ISC License | 5 votes |
def seek(self, consumer, topic, partition): KafkaOffset = apps.get_model(app_label='logpipe', model_name='KafkaOffset') tp = kafka.TopicPartition(topic=topic, partition=partition) try: obj = KafkaOffset.objects.get(topic=topic, partition=partition) logger.debug('Seeking to offset "%s" on topic "%s", partition "%s"' % (obj.offset, topic, partition)) consumer.client.seek(tp, obj.offset) except KafkaOffset.DoesNotExist: logger.debug('Seeking to beginning of topic "%s", partition "%s"' % (topic, partition)) consumer.client.seek_to_beginning(tp)
Example #11
Source File: kafka.py From django-logpipe with ISC License | 5 votes |
def _get_topic_partitions(self): p = [] partitions = self.client.partitions_for_topic(self.topic_name) if not partitions: raise MissingTopicError('Could not find topic %s. Does it exist?' % self.topic_name) for partition in partitions: tp = kafka.TopicPartition(self.topic_name, partition=partition) p.append(tp) return p
Example #12
Source File: msearch_daemon.py From search-MjoLniR with MIT License | 4 votes |
def iter_records(self) -> Generator[Mapping, None, None]: consumer = kafka.KafkaConsumer(bootstrap_servers=self.brokers, group_id='mjolnir_msearch', enable_auto_commit=False, auto_offset_reset='latest', value_deserializer=lambda x: json.loads(x.decode('utf8')), api_version=mjolnir.kafka.BROKER_VERSION, # Msearch requests are relatively heavy at a few tens of ms each. # 50 requests at 50ms each gives us ~2.5s to process a batch. We # keep this low so kafka regularly gets re-pinged. max_poll_records=min(500, 50 * self.n_workers)) consumer.subscribe([self.topic_work]) try: last_commit = 0.0 offset_commit_interval_sec = 60 offsets = cast(Dict[kafka.TopicPartition, kafka.OffsetAndMetadata], dict()) while self.load_monitor.is_below_threshold: now = time.monotonic() if offsets and now - last_commit > offset_commit_interval_sec: consumer.commit_async(offsets) last_commit = now offsets = {} # By polling directly, rather than using the iter based api, we # have the opportunity to regularly re-check the load monitor # and transition out of the consuming state if needed. poll_response = consumer.poll(timeout_ms=60000) if not poll_response: continue with Metric.PROCESS_BATCH.time(): for tp, records in poll_response.items(): for record in records: self.load_monitor.notify() yield record.value # Wait for all the work to complete self.work_queue.join() for tp, records in poll_response.items(): offsets[tp] = kafka.OffsetAndMetadata(records[-1].offset + 1, '') Metric.RECORDS_PROCESSED.inc(sum(len(x) for x in poll_response.values())) finally: if offsets: consumer.commit(offsets) consumer.close()
Example #13
Source File: client.py From search-MjoLniR with MIT License | 4 votes |
def kafka_to_rdd(sc, client_config, offset_ranges): """Read ranges of kafka partitions into an RDD. Parameters ---------- sc : pyspark.SparkContext client_config : ClientConfig offset_ranges : list of OffsetRange List of topic partitions along with ranges to read. Start and end of range are inclusive. Returns ------- pyspark.RDD Contents of the specified offset_ranges """ def read_offset_range(offset_range): if offset_range.end <= offset_range.start: # Raise exception? return # After serialization round trip these fail an isinstance check. # re-instantiate so we have the expected thing. tp = kafka.TopicPartition(*offset_range.tp) consumer = kafka.KafkaConsumer(bootstrap_servers=client_config.brokers, value_deserializer=lambda x: json.loads(x.decode('utf8'))) try: consumer.assign([tp]) consumer.seek(tp, offset_range.start) while True: poll_response = consumer.poll(timeout_ms=10000) if poll_response and tp in poll_response: for message in poll_response[tp]: if message.offset > offset_range.end: break yield message.value if consumer.position(tp) >= offset_range.end: break finally: consumer.close() return ( # TODO: This isn't the same as assigning each offset_range to a separate # partition, but it doesn't seem like pyspark allows us to do that. Often # enough this seems to achieve the same thing, but without guarantees. sc.parallelize(offset_ranges, len(offset_ranges)) .flatMap(read_offset_range) )
Example #14
Source File: bot.py From fooltrader with MIT License | 4 votes |
def consume_topic_with_func(self, topic, func): consumer = KafkaConsumer(topic, client_id='fooltrader', group_id=self.bot_name, value_deserializer=lambda m: json.loads(m.decode('utf8')), bootstrap_servers=[KAFKA_HOST]) topic_partition = TopicPartition(topic=topic, partition=0) if self.start_timestamp: start_timestamp = int(self.start_timestamp.timestamp() * 1000) end_offset = consumer.end_offsets([topic_partition])[topic_partition] if end_offset == 0: self.logger.warning("topic:{} end offset:{}".format(topic, end_offset)) self.logger.error("the topic:{} has no data,but you want to backtest".format(self.quote_topic)) return # find the offset from start_timestamp offset_and_timestamp = consumer.offsets_for_times({topic_partition: start_timestamp}) if offset_and_timestamp: offset_and_timestamp = offset_and_timestamp[topic_partition] if offset_and_timestamp: # partition assigned after poll, and we could seek consumer.poll(5, 1) # move to the offset consumer.seek(topic_partition, offset_and_timestamp.offset) for message in consumer: if 'timestamp' in message.value: message_time = to_timestamp(message.value['timestamp']) else: message_time = to_timestamp(message.timestamp) if self.end_timestamp and (message_time > self.end_timestamp): consumer.close() break getattr(self, func)(message.value) else: latest_timestamp, _ = get_latest_timestamp_order_from_topic(self.quote_topic) self.logger.warning( "start:{} is after the last record:{}".format(self.start_timestamp, latest_timestamp))