kafka.serializer.Decoder Scala Examples
The following examples show how to use kafka.serializer.Decoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamHelper.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import kafka.KafkaHelper import kafka.common.TopicAndPartition import kafka.consumer.PartitionTopicInfo import kafka.message.MessageAndMetadata import kafka.serializer.Decoder import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.{Logging, SparkException} import scala.reflect.ClassTag case class StreamHelper(kafkaParams: Map[String, String]) extends Logging { // helper for kafka zookeeper lazy val kafkaHelper = KafkaHelper(kafkaParams) lazy val kc = new KafkaCluster(kafkaParams) // 1. get leader's earliest and latest offset // 2. get consumer offset // 3-1. if (2) is bounded in (1) use (2) for stream // 3-2. else use (1) by "auto.offset.reset" private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = { lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase) lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq) { for { topicPartitions <- kc.getPartitions(topics).right smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right } yield { { for { tp <- topicPartitions } yield { val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset) val so = smallOffsets.get(tp).map(_.offset).get val lo = largeOffsets.get(tp).map(_.offset).get logWarning(s"$tp: $co $so $lo") if (co >= so && co <= lo) { (tp, co) } else { (tp, reset match { case Some("smallest") => so case _ => lo }) } } }.toMap } }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok) } def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = { type R = (K, V) val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message()) kafkaHelper.registerConsumerInZK(topics) new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler) } def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = { val offsetsMap = { for { range <- offsets.offsetRanges if range.fromOffset < range.untilOffset } yield { logDebug(range.toString()) TopicAndPartition(range.topic, range.partition) -> range.untilOffset } }.toMap kafkaHelper.commitConsumerOffsets(offsetsMap) } def commitConsumerOffset(range: OffsetRange): Unit = { if (range.fromOffset < range.untilOffset) { try { val tp = TopicAndPartition(range.topic, range.partition) logDebug("Committed offset " + range.untilOffset + " for topic " + tp) kafkaHelper.commitConsumerOffset(tp, range.untilOffset) } catch { case t: Throwable => // log it and let it go logWarning("exception during commitOffsets", t) throw t } } } def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = { stream.foreachRDD { rdd => commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges]) } } }
Example 2
Source File: CheckpointedDirectKafkaInputDStream.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka prevOffsets = currentOffsets currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset) prevOffsets == currentOffsets match { case false => Some(rdd) case true => None } } def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = { currentOffsets = offsets } }
Example 3
Source File: AvroDecoder.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import java.util.Arrays.copyOfRange import kafka.serializer.Decoder import kafka.utils.VerifiableProperties import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericRecord} sealed trait AvroDecoder[T] extends Decoder[T] { def props: VerifiableProperties protected val schema = new Schema.Parser().parse(props.getString(Avro.SCHEMA)) protected val skipBytes = props.getInt(Avro.SKIP_BYTES, 0) protected val reader = new GenericDatumReader[GenericRecord](schema) protected val decoder = Avro.recordDecoder(reader) private def skip(bytes: Array[Byte], size: Int): Array[Byte] = { val length = bytes.length length - size match { case remaining if remaining > 0 => copyOfRange(bytes, size, length) case _ => new Array[Byte](0) } } def parse(bytes: Array[Byte]): GenericRecord = { val data = if (skipBytes == 0) bytes else skip(bytes, skipBytes) decoder(data) } } class AvroRecordDecoder(val props: VerifiableProperties) extends AvroDecoder[GenericRecord] { override def fromBytes(bytes: Array[Byte]): GenericRecord = parse(bytes) } class AvroMapDecoder(val props: VerifiableProperties) extends AvroDecoder[Map[String, Any]] { override def fromBytes(bytes: Array[Byte]): Map[String, Any] = Avro.toMap(parse(bytes)) } class AvroJsonDecoder(val props: VerifiableProperties) extends AvroDecoder[String] { override def fromBytes(bytes: Array[Byte]): String = Avro.toJson(parse(bytes)) }
Example 4
Source File: KafkaConsumerActor.scala From coral with Apache License 2.0 | 5 votes |
package io.coral.actors.connector import java.util.Properties import akka.actor.Props import io.coral.actors.CoralActor import io.coral.actors.connector.KafkaConsumerActor.{StopReadingMessageQueue, ReadMessageQueue} import io.coral.lib.{ConfigurationBuilder, KafkaJsonConsumer} import kafka.serializer.Decoder import kafka.tools.MessageFormatter import org.apache.kafka.clients.consumer.ConsumerConfig import org.json4s.JsonAST.{JNothing, JObject, JValue} object KafkaConsumerActor { case class ReadMessageQueue() case class StopReadingMessageQueue() implicit val formats = org.json4s.DefaultFormats val builder = new ConfigurationBuilder("kafka.consumer") def getParams(json: JValue) = { for { kafka <- (json \ "params" \ "kafka").extractOpt[JObject] topic <- (json \ "params" \ "topic").extractOpt[String] } yield { val properties = consumerProperties(kafka) (properties, topic) } } def consumerProperties(json: JObject): Properties = { val properties = builder.properties json.values.foreach { case (k: String, v: String) => properties.setProperty(k, v) } properties } def apply(json: JValue): Option[Props] = { getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer())) } def apply(json: JValue, decoder: Decoder[JValue]): Option[Props] = { getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer(decoder))) } } class KafkaConsumerActor(json: JObject, connection: KafkaJsonConsumer) extends CoralActor(json) { val (properties, topic) = KafkaConsumerActor.getParams(json).get lazy val stream = connection.stream(topic, properties) var shouldStop = false override def preStart(): Unit = { super.preStart() } override def receiveExtra: Receive = { case ReadMessageQueue() if stream.hasNextInTime => val message: JValue = stream.next stream.commitOffsets if (message != JNothing) { emit(message) } if (!shouldStop) { self ! ReadMessageQueue() } case ReadMessageQueue() => self ! ReadMessageQueue() case StopReadingMessageQueue() => shouldStop = true } }
Example 5
Source File: KafkaJsonConsumer.scala From coral with Apache License 2.0 | 5 votes |
package io.coral.lib import java.util.Properties import com.fasterxml.jackson.core.JsonParseException import kafka.consumer._ import kafka.serializer.{Decoder, DefaultDecoder} import org.json4s.JsonAST.{JNothing, JValue} import org.json4s.jackson.JsonMethods._ object KafkaJsonConsumer { def apply() = new KafkaJsonConsumer(JsonDecoder) def apply(decoder: Decoder[JValue]) = new KafkaJsonConsumer(decoder) } class KafkaJsonConsumer(decoder: Decoder[JValue]) { def stream(topic: String, properties: Properties): KafkaJsonStream = { val connection = Consumer.create(new ConsumerConfig(properties)) val stream = connection.createMessageStreamsByFilter( Whitelist(topic), 1, new DefaultDecoder, decoder)(0) new KafkaJsonStream(connection, stream) } } class KafkaJsonStream(connection: ConsumerConnector, stream: KafkaStream[Array[Byte], JValue]) { private lazy val it = stream.iterator // this method relies on a timeout value having been set @inline def hasNextInTime: Boolean = try { it.hasNext } catch { case cte: ConsumerTimeoutException => false } @inline def next: JValue = it.next.message @inline def commitOffsets = connection.commitOffsets } object JsonDecoder extends Decoder[JValue] { val encoding = "UTF8" override def fromBytes(bytes: Array[Byte]): JValue = { val s = new String(bytes, encoding) try { parse(s) } catch { case jpe: JsonParseException => JNothing } } }