kafka.message.MessageAndMetadata Scala Examples
The following examples show how to use kafka.message.MessageAndMetadata.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamHelper.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import kafka.KafkaHelper import kafka.common.TopicAndPartition import kafka.consumer.PartitionTopicInfo import kafka.message.MessageAndMetadata import kafka.serializer.Decoder import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.{Logging, SparkException} import scala.reflect.ClassTag case class StreamHelper(kafkaParams: Map[String, String]) extends Logging { // helper for kafka zookeeper lazy val kafkaHelper = KafkaHelper(kafkaParams) lazy val kc = new KafkaCluster(kafkaParams) // 1. get leader's earliest and latest offset // 2. get consumer offset // 3-1. if (2) is bounded in (1) use (2) for stream // 3-2. else use (1) by "auto.offset.reset" private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = { lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase) lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq) { for { topicPartitions <- kc.getPartitions(topics).right smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right } yield { { for { tp <- topicPartitions } yield { val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset) val so = smallOffsets.get(tp).map(_.offset).get val lo = largeOffsets.get(tp).map(_.offset).get logWarning(s"$tp: $co $so $lo") if (co >= so && co <= lo) { (tp, co) } else { (tp, reset match { case Some("smallest") => so case _ => lo }) } } }.toMap } }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok) } def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = { type R = (K, V) val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message()) kafkaHelper.registerConsumerInZK(topics) new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler) } def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = { val offsetsMap = { for { range <- offsets.offsetRanges if range.fromOffset < range.untilOffset } yield { logDebug(range.toString()) TopicAndPartition(range.topic, range.partition) -> range.untilOffset } }.toMap kafkaHelper.commitConsumerOffsets(offsetsMap) } def commitConsumerOffset(range: OffsetRange): Unit = { if (range.fromOffset < range.untilOffset) { try { val tp = TopicAndPartition(range.topic, range.partition) logDebug("Committed offset " + range.untilOffset + " for topic " + tp) kafkaHelper.commitConsumerOffset(tp, range.untilOffset) } catch { case t: Throwable => // log it and let it go logWarning("exception during commitOffsets", t) throw t } } } def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = { stream.foreachRDD { rdd => commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges]) } } }
Example 2
Source File: CheckpointedDirectKafkaInputDStream.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka prevOffsets = currentOffsets currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset) prevOffsets == currentOffsets match { case false => Some(rdd) case true => None } } def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = { currentOffsets = offsets } }
Example 3
Source File: EtlProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import kafka.common.TopicAndPartition import kafka.message.MessageAndMetadata import kafka.serializer.DefaultDecoder import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.kafka._ import org.slf4j.LoggerFactory import yamrcraft.etlite.Settings import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager} import yamrcraft.etlite.transformers.InboundMessage object EtlProcessor { val logger = LoggerFactory.getLogger(this.getClass) def run(settings: Settings) = { val context = createContext(settings) val stateManager = new KafkaStateManager(settings.etl.state) val lastState = stateManager.readState logger.info(s"last persisted state: $lastState") val currState = stateManager.fetchNextState(lastState, settings) logger.info(s"batch working state: $currState") val rdd = createRDD(context, currState, settings) processRDD(rdd, currState.jobId, settings) logger.info("committing state") stateManager.commitState(currState) } private def createContext(settings: Settings) = { val sparkConf = new SparkConf() .setAppName(settings.spark.appName) .setAll(settings.spark.conf) new SparkContext(sparkConf) } private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = { KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage]( context, settings.kafka.properties, state.ranges.toArray, Map[TopicAndPartition, Broker](), (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) } ) } private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = { // passed to remote workers val etlSettings = settings.etl logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]") val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD) rdd.foreachPartition { partition => // executed at the worker new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings) .processPartition(partition) } logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]") } }
Example 4
Source File: KafkaJsonConsumerSpec.scala From coral with Apache License 2.0 | 5 votes |
package io.coral.lib import java.util.Properties import kafka.consumer._ import kafka.message.MessageAndMetadata import org.json4s.JsonAST.{JNothing, JValue} import org.json4s.jackson.JsonMethods._ import org.mockito.Mockito._ import org.scalatest.mock.MockitoSugar import org.scalatest.{Matchers, WordSpec} class KafkaJsonConsumerSpec extends WordSpec with Matchers with MockitoSugar { "KafkaJsonConsumer" should { "provide a stream" in { val consumer = KafkaJsonConsumer() intercept[IllegalArgumentException] { consumer.stream("abc", new Properties()) } } } "KafkaJsonStream" should { val fakeConnection = mock[ConsumerConnector] doNothing.when(fakeConnection).commitOffsets val fakeMessage = mock[MessageAndMetadata[Array[Byte], JValue]] when(fakeMessage.key()).thenReturn("TestKey".getBytes) when(fakeMessage.message()).thenReturn(parse( """{ "json": "test" }""")) val fakeIterator = mock[ConsumerIterator[Array[Byte], JValue]] when(fakeIterator.hasNext()).thenReturn(true).thenReturn(false) when(fakeIterator.next()).thenReturn(fakeMessage) val fakeStream = mock[KafkaStream[Array[Byte], JValue]] when(fakeStream.iterator()).thenReturn(fakeIterator) "provide a next value" in { val kjs = new KafkaJsonStream(fakeConnection, fakeStream) kjs.hasNextInTime shouldBe true kjs.next shouldBe parse( """{ "json": "test" }""") } } "JsonDecoder" should { "convert bytes to Json object" in { val jsonString = """{ "hello": "json" }""" val bytes = jsonString.getBytes val jsonValue = parse(jsonString) JsonDecoder.fromBytes(bytes) shouldBe jsonValue } "return JNothing for invalid JSon" in { val jsonString = """hello""" val bytes = jsonString.getBytes JsonDecoder.fromBytes(bytes) shouldBe JNothing } } }