kafka.common.TopicAndPartition Scala Examples
The following examples show how to use kafka.common.TopicAndPartition.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaClient.scala From incubator-retired-gearpump with Apache License 2.0 | 6 votes |
package org.apache.gearpump.streaming.kafka.lib.util import kafka.admin.AdminUtils import kafka.cluster.Broker import kafka.common.TopicAndPartition import kafka.consumer.SimpleConsumer import kafka.utils.{ZKStringSerializer, ZkUtils} import org.I0Itec.zkclient.ZkClient import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.common.serialization.Serializer object KafkaClient { private val LOG = LogUtil.getLogger(classOf[KafkaClient]) val factory = new KafkaClientFactory class KafkaClientFactory extends java.io.Serializable { def getKafkaClient(config: KafkaConfig): KafkaClient = { val consumerConfig = config.getConsumerConfig val zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs, consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer) new KafkaClient(config, zkClient) } } } class KafkaClient(config: KafkaConfig, zkClient: ZkClient) { import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient._ private val consumerConfig = config.getConsumerConfig def getTopicAndPartitions(consumerTopics: List[String]): Array[TopicAndPartition] = { try { ZkUtils.getPartitionsForTopics(zkClient, consumerTopics).flatMap { case (topic, partitions) => partitions.map(TopicAndPartition(topic, _)) }.toArray } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def getBroker(topic: String, partition: Int): Broker = { try { val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition) .getOrElse(throw new RuntimeException( s"leader not available for TopicAndPartition($topic, $partition)")) ZkUtils.getBrokerInfo(zkClient, leader) .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader")) } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def createConsumer(topic: String, partition: Int, startOffsetTime: Long): KafkaConsumer = { val broker = getBroker(topic, partition) val soTimeout = consumerConfig.socketTimeoutMs val soBufferSize = consumerConfig.socketReceiveBufferBytes val clientId = consumerConfig.clientId val fetchSize = consumerConfig.fetchMessageMaxBytes val consumer = new SimpleConsumer(broker.host, broker.port, soTimeout, soBufferSize, clientId) KafkaConsumer(topic, partition, startOffsetTime, fetchSize, consumer) } def createProducer[K, V](keySerializer: Serializer[K], valueSerializer: Serializer[V]): KafkaProducer[K, V] = { new KafkaProducer[K, V](config.getProducerConfig, keySerializer, valueSerializer) } def createTopic(topic: String, partitions: Int, replicas: Int): Boolean = { try { if (AdminUtils.topicExists(zkClient, topic)) { LOG.info(s"topic $topic exists") true } else { AdminUtils.createTopic(zkClient, topic, partitions, replicas) LOG.info(s"created topic $topic") false } } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def close(): Unit = { zkClient.close() } }
Example 2
Source File: KafkaProvider.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.bandarlog.providers import com.aol.one.dwh.bandarlog.connectors.KafkaConnector import com.aol.one.dwh.bandarlog.metrics.AtomicValue import com.aol.one.dwh.infra.config.Topic import kafka.common.TopicAndPartition private def getLag(heads: Map[TopicAndPartition, Long], offsets: Map[TopicAndPartition, Long]): Long = { if (offsets.keySet.isEmpty) { heads.values.sum } else { val lags = heads.map { case (key, _) => val lagValue = heads(key) - offsets.getOrElse(key, 0L) lagValue.max(0) } lags.sum } } }
Example 3
Source File: KafkaConsumer.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.common.streaming.metrics import java.util.Properties import kafka.api.{OffsetRequest, FetchRequestBuilder} import kafka.common.ErrorMapping._ import kafka.common.TopicAndPartition import kafka.consumer.{ConsumerConfig, SimpleConsumer} import kafka.message.MessageAndOffset import kafka.utils.{ZKStringSerializer, ZkUtils, Utils} import org.I0Itec.zkclient.ZkClient class KafkaConsumer(zookeeperConnect: String, topic: String, partition: Int) { private val CLIENT_ID = "metrics_reader" private val props = new Properties() props.put("zookeeper.connect", zookeeperConnect) props.put("group.id", CLIENT_ID) private val config = new ConsumerConfig(props) private val consumer = createConsumer private val earliestOffset = consumer .earliestOrLatestOffset(TopicAndPartition(topic, partition), OffsetRequest.EarliestTime, -1) private var nextOffset: Long = earliestOffset private var iterator: Iterator[MessageAndOffset] = getIterator(nextOffset) def next(): Array[Byte] = { val mo = iterator.next() val message = mo.message nextOffset = mo.nextOffset Utils.readBytes(message.payload) } def hasNext: Boolean = { @annotation.tailrec def hasNextHelper(iter: Iterator[MessageAndOffset], newIterator: Boolean): Boolean = { if (iter.hasNext) true else if (newIterator) false else { iterator = getIterator(nextOffset) hasNextHelper(iterator, newIterator = true) } } hasNextHelper(iterator, newIterator = false) } def close(): Unit = { consumer.close() } private def createConsumer: SimpleConsumer = { val zkClient = new ZkClient(zookeeperConnect, 6000, 6000, ZKStringSerializer) try { val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition) .getOrElse(throw new RuntimeException( s"leader not available for TopicAndPartition($topic, $partition)")) val broker = ZkUtils.getBrokerInfo(zkClient, leader) .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader")) new SimpleConsumer(broker.host, broker.port, config.socketTimeoutMs, config.socketReceiveBufferBytes, CLIENT_ID) } catch { case e: Exception => throw e } finally { zkClient.close() } } private def getIterator(offset: Long): Iterator[MessageAndOffset] = { val request = new FetchRequestBuilder() .addFetch(topic, partition, offset, config.fetchMessageMaxBytes) .build() val response = consumer.fetch(request) response.errorCode(topic, partition) match { case NoError => response.messageSet(topic, partition).iterator case error => throw exceptionFor(error) } } }
Example 4
Source File: KafkaClusterSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 5
Source File: KafkaClusterSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 6
Source File: KafkaClusterSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) // private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") {//元数据API val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") {//指挥者偏移API val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") {//消费者偏移API val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 7
Source File: KafkaClusterSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 8
Source File: KafkaClusterSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 9
Source File: KafkaInMessagesProviderTest.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.bandarlog.providers import com.aol.one.dwh.bandarlog.connectors.KafkaConnector import com.aol.one.dwh.infra.config.Topic import kafka.common.TopicAndPartition import org.mockito.Mockito.when import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class KafkaInMessagesProviderTest extends FunSuite with MockitoSugar { private val kafkaConnector = mock[KafkaConnector] private val topic = Topic("topic_id", Set("topic_1", "topic_2"), "group_id") test("check count of in messages/heads over all topic partitions") { val heads = Some(Map( TopicAndPartition("topic_1", 1) -> 1L, TopicAndPartition("topic_2", 2) -> 2L, TopicAndPartition("topic_3", 3) -> 3L )) when(kafkaConnector.getHeads(topic)).thenReturn(heads) val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.nonEmpty) assert(result.getValue.get == 6) // 1 + 2 + 3 } test("check count of in messages/heads for empty heads result") { when(kafkaConnector.getHeads(topic)).thenReturn(Some(Map[TopicAndPartition, Long]())) val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.nonEmpty) assert(result.getValue.get == 0) } test("return none if can't retrieve heads") { when(kafkaConnector.getHeads(topic)).thenReturn(None) val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.isEmpty) } }
Example 10
Source File: KafkaLagProviderTest.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.bandarlog.providers import com.aol.one.dwh.infra.config.Topic import com.aol.one.dwh.bandarlog.connectors.KafkaConnector import kafka.common.TopicAndPartition import org.mockito.Mockito.when import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class KafkaLagProviderTest extends FunSuite with MockitoSugar { private val kafkaConnector = mock[KafkaConnector] private val topic = Topic("topic_id", Set("topic_1", "topic_2", "topic_3"), "group_id") test("check lag per topic") { val heads = Map( TopicAndPartition("topic_1", 1) -> 4L, TopicAndPartition("topic_2", 2) -> 5L, TopicAndPartition("topic_3", 3) -> 6L ) val offsets = Map( TopicAndPartition("topic_1", 1) -> 1L, TopicAndPartition("topic_2", 2) -> 2L, TopicAndPartition("topic_3", 3) -> 3L ) val kafkaState = Option((heads, offsets)) when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState) val result = new KafkaLagProvider(kafkaConnector, topic).provide() // topic partition heads offsets lag // topic_1 1 4 1 4-1=3 // topic_2 2 5 2 5-2=3 // topic_3 3 6 3 6-3=3 assert(result.getValue.nonEmpty) assert(result.getValue.get == 9) // lag sum 3 + 3 + 3 } test("check 0 lag case per topic") { val heads = Map( TopicAndPartition("topic_1", 1) -> 1L, TopicAndPartition("topic_2", 2) -> 2L, TopicAndPartition("topic_3", 3) -> 3L ) val offsets = Map( TopicAndPartition("topic_1", 1) -> 4L, TopicAndPartition("topic_2", 2) -> 5L, TopicAndPartition("topic_3", 3) -> 6L ) val kafkaState = Option((heads, offsets)) when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState) val result = new KafkaLagProvider(kafkaConnector, topic).provide() // topic partition heads offsets lag // topic_1 1 1 4 1-4= -3 // topic_2 2 2 5 2-5= -3 // topic_3 3 3 6 3-6= -3 assert(result.getValue.nonEmpty) assert(result.getValue.get == 0) // lag.max(0) = 0 } test("check lag for empty heads and offsets") { val kafkaState = Option((Map[TopicAndPartition, Long](), Map[TopicAndPartition, Long]())) when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState) val result = new KafkaLagProvider(kafkaConnector, topic).provide() assert(result.getValue.nonEmpty) assert(result.getValue.get == 0) } test("return none if can't retrieve kafka state") { when(kafkaConnector.getKafkaState(topic)).thenReturn(None) val result = new KafkaLagProvider(kafkaConnector, topic).provide() assert(result.getValue.isEmpty) } }
Example 11
Source File: KafkaOutMessagesProviderTest.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.bandarlog.providers import com.aol.one.dwh.bandarlog.connectors.KafkaConnector import com.aol.one.dwh.infra.config.Topic import kafka.common.TopicAndPartition import org.mockito.Mockito.when import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class KafkaOutMessagesProviderTest extends FunSuite with MockitoSugar { private val kafkaConnector = mock[KafkaConnector] private val topic = Topic("topic_id", Set("topic_1", "topic_2"), "group_id") test("check count of out messages/offsets over all topic partitions") { val offsets = Option(Map( TopicAndPartition("topic_1", 1) -> 1L, TopicAndPartition("topic_2", 2) -> 2L, TopicAndPartition("topic_3", 3) -> 3L )) when(kafkaConnector.getOffsets(topic)).thenReturn(offsets) val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.nonEmpty) assert(result.getValue.get == 6) // 1 + 2 + 3 } test("check count of out messages/offsets for empty offsets result") { when(kafkaConnector.getOffsets(topic)).thenReturn(Some(Map[TopicAndPartition, Long]())) val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.nonEmpty) assert(result.getValue.get == 0) } test("return none if can't retrieve offsets") { when(kafkaConnector.getOffsets(topic)).thenReturn(None) val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide() assert(result.getValue.isEmpty) } }
Example 12
Source File: KafkaConnector.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.bandarlog.connectors import com.aol.one.dwh.bandarlog.connectors.KafkaConnector._ import com.aol.one.dwh.infra.config.Topic import com.aol.one.dwh.infra.kafka.KafkaCluster import com.aol.one.dwh.infra.util.LogTrait import kafka.common.TopicAndPartition object KafkaConnector { type Offset = Long type KafkaPartitions = Set[TopicAndPartition] type KafkaHeads = Map[TopicAndPartition, Offset] type KafkaOffsets = Map[TopicAndPartition, Offset] type KafkaState = (KafkaHeads, KafkaOffsets) } class KafkaConnector(kafkaCluster: KafkaCluster) extends LogTrait { def getKafkaState(topic: Topic): Option[KafkaState] = for { heads <- getHeads(topic) offsets <- getOffsets(topic) } yield (heads, offsets) def getHeads(topic: Topic): Option[KafkaHeads] = { kafkaCluster.getLatestOffsets(topic.groupId, topic.values) match { case Left(l) => logger.error(s"Cannot obtain leaders offsets for topic:[${topic.values}], cause {}", l.toString) None case Right(r) => Some(r.map { case (key, value) => key -> value }) } } def getOffsets(topic: Topic): Option[KafkaOffsets] = { kafkaCluster.getConsumerOffsets(topic.groupId, topic.values ) match { case Left(l) => logger.error(s"Cannot obtain consumers offsets for topic:[${topic.values}], cause {}", l.toString) None case Right(r) => Some(r) } } }
Example 13
Source File: KafkaClusterSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 14
Source File: KafkaSourceOffset.scala From spark-kafka-0-8-sql with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.sql.kafka08 import kafka.common.TopicAndPartition import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset object KafkaSourceOffset { def getPartitionOffsets(offset: Offset): Map[TopicAndPartition, LeaderOffset] = { offset match { case o: KafkaSourceOffset => o.partitionToOffsets case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to KafkaSourceOffset") } } }
Example 15
Source File: EtlProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import kafka.common.TopicAndPartition import kafka.message.MessageAndMetadata import kafka.serializer.DefaultDecoder import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.kafka._ import org.slf4j.LoggerFactory import yamrcraft.etlite.Settings import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager} import yamrcraft.etlite.transformers.InboundMessage object EtlProcessor { val logger = LoggerFactory.getLogger(this.getClass) def run(settings: Settings) = { val context = createContext(settings) val stateManager = new KafkaStateManager(settings.etl.state) val lastState = stateManager.readState logger.info(s"last persisted state: $lastState") val currState = stateManager.fetchNextState(lastState, settings) logger.info(s"batch working state: $currState") val rdd = createRDD(context, currState, settings) processRDD(rdd, currState.jobId, settings) logger.info("committing state") stateManager.commitState(currState) } private def createContext(settings: Settings) = { val sparkConf = new SparkConf() .setAppName(settings.spark.appName) .setAll(settings.spark.conf) new SparkContext(sparkConf) } private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = { KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage]( context, settings.kafka.properties, state.ranges.toArray, Map[TopicAndPartition, Broker](), (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) } ) } private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = { // passed to remote workers val etlSettings = settings.etl logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]") val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD) rdd.foreachPartition { partition => // executed at the worker new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings) .processPartition(partition) } logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]") } }
Example 16
Source File: KafkaClusterSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 17
Source File: DefaultPartitionGrouperSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.source.grouper import kafka.common.TopicAndPartition import org.scalacheck.Gen import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} class DefaultPartitionGrouperSpec extends PropSpec with PropertyChecks with Matchers { property("KafkaDefaultGrouper should group TopicAndPartitions in a round-robin way") { forAll(Gen.posNum[Int], Gen.posNum[Int], Gen.posNum[Int]) { (topicNum: Int, partitionNum: Int, taskNum: Int) => { val topicAndPartitions = for { t <- 0.until(topicNum) p <- 0.until(partitionNum) } yield TopicAndPartition("topic" + t, p) 0.until(taskNum).foreach { taskIndex => val grouper = new DefaultPartitionGrouper grouper.group(taskNum, taskIndex, topicAndPartitions.toArray).forall( tp => topicAndPartitions.indexOf(tp) % taskNum == taskIndex) } } } } }
Example 18
Source File: KafkaConsumerSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.source.consumer import com.twitter.bijection.Injection import kafka.api.OffsetRequest import kafka.common.TopicAndPartition import kafka.consumer.SimpleConsumer import kafka.message.{Message, MessageAndOffset} import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} class KafkaConsumerSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { val messageGen = Gen.alphaStr map (msg => new Message(Injection[String, Array[Byte]](msg))) val messageNumGen = Gen.choose[Int](0, 1000) val topicAndPartitionGen = for { topic <- Gen.alphaStr partition <- Gen.choose[Int](0, Int.MaxValue) } yield (topic, partition) property("KafkaConsumer should iterate MessageAndOffset calling hasNext and next") { forAll(messageGen, messageNumGen, topicAndPartitionGen) { (message: Message, num: Int, topicAndPartition: (String, Int)) => val (topic, partition) = topicAndPartition val consumer = mock[SimpleConsumer] when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition), OffsetRequest.EarliestTime, -1)).thenReturn(0) val iterator = 0.until(num).map(index => MessageAndOffset(message, index.toLong)).iterator val getIterator = (offset: Long) => iterator val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator) 0.until(num).foreach { i => kafkaConsumer.hasNext shouldBe true val kafkaMessage = kafkaConsumer.next kafkaMessage.offset shouldBe i.toLong kafkaMessage.key shouldBe None } kafkaConsumer.hasNext shouldBe false } } val startOffsetGen = Gen.choose[Long](1L, 1000L) property("KafkaConsumer setStartOffset should reset internal iterator") { forAll(topicAndPartitionGen, startOffsetGen) { (topicAndPartition: (String, Int), startOffset: Long) => val (topic, partition) = topicAndPartition val consumer = mock[SimpleConsumer] val getIterator = mock[Long => Iterator[MessageAndOffset]] when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition), OffsetRequest.EarliestTime, -1)).thenReturn(0) val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator) kafkaConsumer.setStartOffset(startOffset) verify(getIterator).apply(startOffset) } } property("KafkaConsumer close should close SimpleConsumer") { forAll(topicAndPartitionGen) { (topicAndPartition: (String, Int)) => val (topic, partition) = topicAndPartition val consumer = mock[SimpleConsumer] when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition), OffsetRequest.EarliestTime, -1)).thenReturn(0) val getIterator = mock[Long => Iterator[MessageAndOffset]] val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator) kafkaConsumer.close() verify(consumer).close() } } }
Example 19
Source File: KafkaConsumer.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.source.consumer import kafka.api.{FetchRequestBuilder, OffsetRequest} import kafka.common.ErrorMapping._ import kafka.common.TopicAndPartition import kafka.consumer.SimpleConsumer import kafka.message.MessageAndOffset import kafka.utils.Utils object KafkaConsumer { def apply(topic: String, partition: Int, startOffsetTime: Long, fetchSize: Int, consumer: SimpleConsumer): KafkaConsumer = { val getIterator = (offset: Long) => { val request = new FetchRequestBuilder() .addFetch(topic, partition, offset, fetchSize) .build() val response = consumer.fetch(request) response.errorCode(topic, partition) match { case NoError => response.messageSet(topic, partition).iterator case error => throw exceptionFor(error) } } new KafkaConsumer(consumer, topic, partition, getIterator, startOffsetTime) } } class KafkaConsumer(consumer: SimpleConsumer, topic: String, partition: Int, getIterator: (Long) => Iterator[MessageAndOffset], startOffsetTime: Long = OffsetRequest.EarliestTime) { private val earliestOffset = consumer .earliestOrLatestOffset(TopicAndPartition(topic, partition), startOffsetTime, -1) private var nextOffset: Long = earliestOffset private var iterator: Iterator[MessageAndOffset] = getIterator(nextOffset) def setStartOffset(startOffset: Long): Unit = { nextOffset = startOffset iterator = getIterator(nextOffset) } def next(): KafkaMessage = { val mo = iterator.next() val message = mo.message nextOffset = mo.nextOffset val offset = mo.offset val payload = Utils.readBytes(message.payload) new KafkaMessage(topic, partition, offset, Option(message.key).map(Utils.readBytes), payload) } def hasNext: Boolean = { @annotation.tailrec def hasNextHelper(iter: Iterator[MessageAndOffset], newIterator: Boolean): Boolean = { if (iter.hasNext) true else if (newIterator) false else { iterator = getIterator(nextOffset) hasNextHelper(iterator, newIterator = true) } } hasNextHelper(iterator, newIterator = false) } def getNextOffset: Long = nextOffset def close(): Unit = { consumer.close() } }
Example 20
Source File: FetchThread.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.source.consumer import java.nio.channels.ClosedByInterruptException import java.util.concurrent.LinkedBlockingQueue import kafka.common.TopicAndPartition import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.slf4j.Logger import org.apache.gearpump.util.LogUtil object FetchThread { private val LOG: Logger = LogUtil.getLogger(classOf[FetchThread]) val factory = new FetchThreadFactory class FetchThreadFactory extends java.io.Serializable { def getFetchThread(config: KafkaConfig, client: KafkaClient): FetchThread = { val fetchThreshold = config.getInt(KafkaConfig.FETCH_THRESHOLD_CONFIG) val fetchSleepMS = config.getLong(KafkaConfig.FETCH_SLEEP_MS_CONFIG) val startOffsetTime = config.getLong(KafkaConfig.CONSUMER_START_OFFSET_CONFIG) FetchThread(fetchThreshold, fetchSleepMS, startOffsetTime, client) } } def apply(fetchThreshold: Int, fetchSleepMS: Long, startOffsetTime: Long, client: KafkaClient): FetchThread = { val createConsumer = (tp: TopicAndPartition) => client.createConsumer(tp.topic, tp.partition, startOffsetTime) val incomingQueue = new LinkedBlockingQueue[KafkaMessage]() val sleeper = new ExponentialBackoffSleeper( backOffMultiplier = 2.0, initialDurationMs = 100L, maximumDurationMs = 10000L) new FetchThread(createConsumer, incomingQueue, sleeper, fetchThreshold, fetchSleepMS) } } private def fetchMessage: Boolean = { if (incomingQueue.size >= fetchThreshold) { false } else { consumers.foldLeft(false) { (hasNext, tpAndConsumer) => val (_, consumer) = tpAndConsumer if (consumer.hasNext) { incomingQueue.put(consumer.next()) true } else { hasNext } } } } private def createAllConsumers: Map[TopicAndPartition, KafkaConsumer] = { topicAndPartitions.map(tp => tp -> createConsumer(tp)).toMap } private def resetConsumers(nextOffsets: Map[TopicAndPartition, Long]): Unit = { consumers.values.foreach(_.close()) consumers = createAllConsumers consumers.foreach { case (tp, consumer) => consumer.setStartOffset(nextOffsets(tp)) } } }
Example 21
Source File: CheckpointedDirectKafkaInputDStream.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka prevOffsets = currentOffsets currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset) prevOffsets == currentOffsets match { case false => Some(rdd) case true => None } } def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = { currentOffsets = offsets } }
Example 22
Source File: StreamHelper.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import kafka.KafkaHelper import kafka.common.TopicAndPartition import kafka.consumer.PartitionTopicInfo import kafka.message.MessageAndMetadata import kafka.serializer.Decoder import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.{Logging, SparkException} import scala.reflect.ClassTag case class StreamHelper(kafkaParams: Map[String, String]) extends Logging { // helper for kafka zookeeper lazy val kafkaHelper = KafkaHelper(kafkaParams) lazy val kc = new KafkaCluster(kafkaParams) // 1. get leader's earliest and latest offset // 2. get consumer offset // 3-1. if (2) is bounded in (1) use (2) for stream // 3-2. else use (1) by "auto.offset.reset" private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = { lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase) lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq) { for { topicPartitions <- kc.getPartitions(topics).right smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right } yield { { for { tp <- topicPartitions } yield { val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset) val so = smallOffsets.get(tp).map(_.offset).get val lo = largeOffsets.get(tp).map(_.offset).get logWarning(s"$tp: $co $so $lo") if (co >= so && co <= lo) { (tp, co) } else { (tp, reset match { case Some("smallest") => so case _ => lo }) } } }.toMap } }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok) } def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = { type R = (K, V) val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message()) kafkaHelper.registerConsumerInZK(topics) new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler) } def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = { val offsetsMap = { for { range <- offsets.offsetRanges if range.fromOffset < range.untilOffset } yield { logDebug(range.toString()) TopicAndPartition(range.topic, range.partition) -> range.untilOffset } }.toMap kafkaHelper.commitConsumerOffsets(offsetsMap) } def commitConsumerOffset(range: OffsetRange): Unit = { if (range.fromOffset < range.untilOffset) { try { val tp = TopicAndPartition(range.topic, range.partition) logDebug("Committed offset " + range.untilOffset + " for topic " + tp) kafkaHelper.commitConsumerOffset(tp, range.untilOffset) } catch { case t: Throwable => // log it and let it go logWarning("exception during commitOffsets", t) throw t } } } def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = { stream.foreachRDD { rdd => commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges]) } } }