Python Examples of confluent_kafka.TopicPartition

Source File: _stream_task.py From winton-kafka-streams with Apache License 2.0

6 votes

def commit_offsets(self):
        """ Commit consumed offsets if needed """

        # may be asked to commit on rebalance or shutdown but
        # should only commit if the processor has requested.
        try:
            if self.commitOffsetNeeded:
                offsets_to_commit = [TopicPartition(t, p, o + 1) for ((t, p), o) in self.consumedOffsets.items()]
                self.consumer.commit(offsets=offsets_to_commit, asynchronous=False)
                self.consumedOffsets.clear()
                self.commitOffsetNeeded = False

        except KafkaException as ke:
            kafka_error = ke.args[0].code()

            if kafka_error in _taskMigratedErrorCodes:
                raise TaskMigratedError(f'{self} migrated.')
            else:
                raise

Source File: test_proto_serializers.py From confluent-kafka-python with Apache License 2.0

6 votes

def test_protobuf_message_serialization(kafka_cluster, pb2, data):
    """
    Validates that we get the same message back that we put in.

    """
    topic = kafka_cluster.create_topic("serialization-proto")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    value_serializer = ProtobufSerializer(pb2, sr)
    value_deserializer = ProtobufDeserializer(pb2)

    producer = kafka_cluster.producer(value_serializer=value_serializer)
    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    expect = pb2(**data)
    producer.produce(topic, value=expect, partition=0)
    producer.flush()

    msg = consumer.poll()
    actual = msg.value()

    assert [getattr(expect, k) == getattr(actual, k) for k in data.keys()]

Source File: integration_test.py From confluent-kafka-python with Apache License 2.0

6 votes

def verify_consumer_seek(c, seek_to_msg):
    """ Seek to message and verify the next consumed message matches.
        Must only be performed on an actively consuming consumer. """

    tp = confluent_kafka.TopicPartition(seek_to_msg.topic(),
                                        seek_to_msg.partition(),
                                        seek_to_msg.offset())
    print('seek: Seeking to %s' % tp)
    c.seek(tp)

    while True:
        msg = c.poll()
        assert msg is not None
        if msg.error():
            print('seek: Ignoring non-message: %s' % msg.error())
            continue

        if msg.topic() != seek_to_msg.topic() or msg.partition() != seek_to_msg.partition():
            continue

        print('seek: message at offset %d' % msg.offset())
        assert msg.offset() == seek_to_msg.offset(), \
            'expected message at offset %d, not %d' % (seek_to_msg.offset(), msg.offset())
        break

Source File: kafka_listener.py From koku with GNU Affero General Public License v3.0

5 votes

def listen_for_messages(msg, consumer, application_source_id):  # noqa: C901
    """
    Listen for Platform-Sources kafka messages.

    Args:
        consumer (Consumer): Kafka consumer object
        application_source_id (Integer): Cost Management's current Application Source ID. Used for
            kafka message filtering.

    Returns:
        None

    """
    try:
        try:
            msg = get_sources_msg_data(msg, application_source_id)
            offset = msg.get("offset")
            partition = msg.get("partition")
        except SourcesMessageError:
            return
        if msg:
            LOG.info(f"Processing message offset: {offset} partition: {partition}")
            topic_partition = TopicPartition(topic=Config.SOURCES_TOPIC, partition=partition, offset=offset)
            LOG.info(f"Cost Management Message to process: {str(msg)}")
            try:
                with transaction.atomic():
                    process_message(application_source_id, msg)
                    consumer.commit()
            except (IntegrityError, InterfaceError, OperationalError) as err:
                connection.close()
                LOG.error(f"{type(err).__name__}: {err}")
                rewind_consumer_to_retry(consumer, topic_partition)
            except SourcesHTTPClientError as err:
                LOG.error(err)
                rewind_consumer_to_retry(consumer, topic_partition)
            except SourceNotFoundError:
                LOG.warning(f"Source not found in platform sources. Skipping msg: {msg}")
                consumer.commit()

    except KafkaError as error:
        LOG.error(f"[listen_for_messages] Kafka error encountered: {type(error).__name__}: {error}", exc_info=True)
    except Exception as error:
        LOG.error(f"[listen_for_messages] UNKNOWN error encountered: {type(error).__name__}: {error}", exc_info=True)

Source File: consumer.py From openwhisk-package-kafka with Apache License 2.0

5 votes

def __sizeMessage(self, message):
        messagePayload = self.__getMessagePayload(message)
        return len(json.dumps(messagePayload).encode('utf-8'))

    # return list of TopicPartition which represent the _next_ offset to consume

Source File: consumer.py From openwhisk-package-kafka with Apache License 2.0

5 votes

def __getOffsetList(self, messages):
        offsets = []
        for message in messages:
            # Add one to the offset, otherwise we'll consume this message again.
            # That's just how Kafka works, you place the bookmark at the *next* message.
            offsets.append(TopicPartition(message.topic(), message.partition(), message.offset() + 1))

        return offsets

Source File: test_avro_serializers.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_avro_record_serialization(kafka_cluster, load_file, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_file (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    """
    topic = kafka_cluster.create_topic("serialization-avro")
    sr = kafka_cluster.schema_registry()

    schema_str = load_file(avsc)
    value_serializer = AvroSerializer(schema_str, sr)

    value_deserializer = AvroDeserializer(schema_str, sr)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    producer.produce(topic, value=data, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data

Source File: test_avro_serializers.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_avro_record_serialization_custom(kafka_cluster):
    """
    Tests basic Avro serializer to_dict and from_dict object hook functionality.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

    """
    topic = kafka_cluster.create_topic("serialization-avro")
    sr = kafka_cluster.schema_registry()

    user = User('Bowie', 47, 'purple')
    value_serializer = AvroSerializer(User.schema_str, sr,
                                      lambda user, ctx:
                                      dict(name=user.name,
                                           favorite_number=user.favorite_number,
                                           favorite_color=user.favorite_color))

    value_deserializer = AvroDeserializer(User.schema_str, sr,
                                          lambda user_dict, ctx:
                                          User(**user_dict))

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    producer.produce(topic, value=user, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    user2 = msg.value()

    assert user2 == user

Source File: test_proto_serializers.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_protobuf_deserializer_type_mismatch(kafka_cluster):
    """
    Ensures an Exception is raised when deserializing an unexpected type.

    """
    pb2_1 = PublicTestProto_pb2.TestMessage
    pb2_2 = metadata_proto_pb2.HDFSOptions

    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})
    topic = kafka_cluster.create_topic("serialization-proto-refs")
    serializer = ProtobufSerializer(pb2_1, sr)
    deserializer = ProtobufDeserializer(pb2_2)

    producer = kafka_cluster.producer(key_serializer=serializer)
    consumer = kafka_cluster.consumer(key_deserializer=deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    def dr(err, msg):
        print("dr msg {} {}".format(msg.key(), msg.value()))

    producer.produce(topic, key=pb2_1(test_string='abc',
                                      test_bool=True,
                                      test_bytes=b'def'),
                     partition=0)
    producer.flush()

    with pytest.raises(ConsumeError,
                       match="Error parsing message"):
        consumer.poll()

Source File: test_json_serializers.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_json_record_deserialization_mismatch(kafka_cluster, load_file):
    """
    Ensures to_dict and from_dict hooks are properly applied by the serializer.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("contractor.json")
    schema_str2 = load_file("product.json")

    value_serializer = JSONSerializer(schema_str, sr)
    value_deserializer = JSONDeserializer(schema_str2)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = {"contractorId": 2,
              "contractorName": "Magnus Edenhill",
              "contractRate": 30,
              "trades": ["pickling"]}

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    with pytest.raises(
            ConsumeError,
            match="'productId' is a required property"):
        consumer.poll()

Source File: test_TopicPartition.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_sort():
    """ TopicPartition sorting (rich comparator) """

    # sorting uses the comparator
    correct = [TopicPartition('topic1', 3),
               TopicPartition('topic3', 0),
               TopicPartition('topicA', 5),
               TopicPartition('topicA', 5)]

    tps = sorted([TopicPartition('topicA', 5),
                  TopicPartition('topic3', 0),
                  TopicPartition('topicA', 5),
                  TopicPartition('topic1', 3)])

    assert correct == tps

Source File: test_TopicPartition.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_cmp():
    """ TopicPartition comparator """

    assert TopicPartition('aa', 19002) > TopicPartition('aa', 0)
    assert TopicPartition('aa', 13) >= TopicPartition('aa', 12)
    assert TopicPartition('BaB', 9) != TopicPartition('Card', 9)
    assert TopicPartition('b3x', 4) == TopicPartition('b3x', 4)
    assert TopicPartition('ulv', 2) < TopicPartition('xy', 0)
    assert TopicPartition('ulv', 2) <= TopicPartition('ulv', 3)

Source File: test_TopicPartition.py From confluent-kafka-python with Apache License 2.0

5 votes

def test_subclassing():
    class SubTopicPartition(TopicPartition):
        def __init__(self, topic_part_str):
            topic, part = topic_part_str.split(":")
            super(SubTopicPartition, self).__init__(topic=topic, partition=int(part))

    st = SubTopicPartition("topic1:0")
    assert st.topic == "topic1"
    assert st.partition == 0

    st = SubTopicPartition("topic2:920")
    assert st.topic == "topic2"
    assert st.partition == 920

Source File: confluent.py From kafka-influxdb with Apache License 2.0

5 votes

def _subscribe(self):
        """
        Subscribe to Kafka topics.

        A workaround for missing Zookeeper support in confluent-python is required here.
        Automatic partition rebalancing is not working with Kafka Versions < 0.9.0.
        Therefore we manually assign the partitions to the consumer for legacy Kafka versions.
        """
        if self.broker_version < self.KAFKA_VERSION_ZOOKEEPER_OPTIONAL:
            self.consumer.assign([TopicPartition(self.topic, p)
                                  for p in range(0, 10)])
        else:
            self.consumer.subscribe([self.topic])

Source File: test_avro_serializers.py From confluent-kafka-python with Apache License 2.0

4 votes

def test_delivery_report_serialization(kafka_cluster, load_file, avsc, data, record_type):
    """
    Tests basic Avro serializer functionality

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture
        load_file (callable(str)): Avro file reader
        avsc (str) avsc: Avro schema file
        data (object): data to be serialized

    """
    topic = kafka_cluster.create_topic("serialization-avro-dr")
    sr = kafka_cluster.schema_registry()
    schema_str = load_file(avsc)

    value_serializer = AvroSerializer(schema_str, sr)

    value_deserializer = AvroDeserializer(schema_str, sr)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    def assert_cb(err, msg):
        actual = value_deserializer(msg.value(),
                                    SerializationContext(topic, MessageField.VALUE))

        if record_type == "record":
            assert [v == actual[k] for k, v in data.items()]
        elif record_type == 'float':
            assert data == pytest.approx(actual)
        else:
            assert actual == data

    producer.produce(topic, value=data, partition=0, on_delivery=assert_cb)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    # schema may include default which need not exist in the original
    if record_type == 'record':
        assert [v == actual[k] for k, v in data.items()]
    elif record_type == 'float':
        assert data == pytest.approx(actual)
    else:
        assert actual == data

Source File: test_json_serializers.py From confluent-kafka-python with Apache License 2.0

4 votes

def test_json_record_serialization_custom(kafka_cluster, load_file):
    """
    Ensures to_dict and from_dict hooks are properly applied by the serializer.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("product.json")
    value_serializer = JSONSerializer(schema_str, sr,
                                      to_dict=_testProduct_to_dict)
    value_deserializer = JSONDeserializer(schema_str,
                                          from_dict=_testProduct_from_dict)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = _TestProduct(product_id=1,
                          name="The ice sculpture",
                          price=12.50,
                          tags=["cold", "ice"],
                          dimensions={"length": 7.0,
                                      "width": 12.0,
                                      "height": 9.5},
                          location={"latitude": -78.75,
                                    "longitude": 20.4})

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    assert all([getattr(actual, attribute) == getattr(record, attribute)
                for attribute in vars(record)])

Python confluent_kafka.TopicPartition() Examples