Example 1
import java.util
import java.util.{Collections, Properties}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

object BasicConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer");
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)

    val listener = new RebalanceListener

    consumer.subscribe(Collections.singletonList(topic), listener)

    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record =
        println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset())

class RebalanceListener extends ConsumerRebalanceListener {
  override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = {
    print("Assigned Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print( + ",")

  override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = {
    print("Revoked Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print( + ",")
Example 2
package com.aliyun.odps.spark.examples.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object KafkaStreamingDemo {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession

    val ssc = new StreamingContext(spark.sparkContext, Seconds(5))

    // 请使用OSS作为Checkpoint存储

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> ",,",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "" -> (false: java.lang.Boolean)

    val topics = Set("event_topic")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)

    val dstream = => (f.key(), f.value()))
    val data: DStream[String] =
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    val wordAndOneDstream: DStream[(String, Int)] =, 1))
    val result: DStream[(String, Int)] = wordAndOneDstream.reduceByKey(_ + _)

Example 3
package com.aliyun.odps.spark.examples.streaming.kafka

import com.aliyun.odps.spark.examples.streaming.common.SparkSessionSingleton
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

object Kafka2OdpsDemo {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("test")
    val ssc = new StreamingContext(sparkConf, Seconds(10))

    // 请使用OSS作为Checkpoint存储,修改为有效OSS路径。OSS访问文档请参考

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "" -> (false: java.lang.Boolean)

    // 创建kafka dstream
    val topics = Set("test")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
    val dstream = => (f.key(), f.value()))
    // 解析kafka数据并写入odps
    val data: DStream[String] =
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    wordsDStream.foreachRDD(rdd => {
      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
      import spark.implicits._


Example 4
package net.manub.embeddedkafka.schemaregistry

import org.apache.kafka.clients.consumer.ConsumerRecord

object Codecs {
  implicit def stringKeyGenericValueCrDecoder[V]
      : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key, cr.value)
  implicit def genericValueCrDecoder[V]: ConsumerRecord[String, V] => V =
  implicit def stringKeyGenericValueTopicCrDecoder[V]
      : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic, cr.key, cr.value)
Example 5
package net.manub.embeddedkafka.schemaregistry.avro

import org.apache.avro.generic.GenericRecord
import org.apache.avro.specific.SpecificRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

  "Avro-related classes will be removed soon",
  since = "5.5.0"
object Codecs {
  implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key, cr.value)
  implicit def avroValueCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => V =
  implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic, cr.key, cr.value)

  implicit def stringKeyGenericValueCrDecoder
      : ConsumerRecord[String, GenericRecord] => (String, GenericRecord) =
    cr => (cr.key, cr.value)

  implicit def genericKeyGenericValueCrDecoder
      : ConsumerRecord[GenericRecord, GenericRecord] => (
      ) =
    cr => (cr.key, cr.value)
Example 6
package com.lightbend.scala.kafka.client

import com.lightbend.scala.kafka.RecordProcessorTrait
import org.apache.kafka.clients.consumer.ConsumerRecord

class RecordProcessor extends RecordProcessorTrait[Array[Byte], Array[Byte]] {
  override def processRecord(record: ConsumerRecord[Array[Byte], Array[Byte]]): Unit = {
    RecordProcessor.count += 1
    val key = record.key()
    val value = record.value()
    println(s"Retrieved message #${RecordProcessor.count}: " +
      mkString("key", key) + ", " + mkString("value", value))

  private def mkString(label: String, array: Array[Byte]) = {
    if (array == null) s"${label} = ${array}"
    else s"${label} = ${array}, size = ${array.size}, first 5 elements = ${array.take(5).mkString("[", ",", "]")}"

object RecordProcessor {
  var count = 0L
Example 7
package com.example.consumer

import java.util.concurrent.atomic.AtomicLong

import akka.Done
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import com.example._
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.concurrent.Future

object PlainSourceConsumer extends App {

  val db = new DB
  db.loadOffset().foreach { fromOffset =>
    val partition = 0
    val subscription = Subscriptions.assignmentWithOffset(
      new TopicPartition(topic, partition) -> fromOffset

    val done =
      Consumer.plainSource(consumerSettings, subscription)


//Zookeeper or DB storage mock
class DB {

  private val offset = new AtomicLong(2)

  def save(record: ConsumerRecord[Array[Byte], String]): Future[Done] = {
    println(s" ${record.value}")

  def loadOffset(): Future[Long] =

  def update(data: String): Future[Done] = {
    println(s"DB.update: $data")
Example 8
package com.example.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010._
import org.apache.spark.{SparkContext, TaskContext}

import scala.collection.JavaConversions._
import com.example._

object KafkaStreamingLatestExample {

  def main(args: Array[String]): Unit = {

  def kafkaStream010Itself() =
    launchWithItself(kafkaStreaming010, appName = "Kafka010_DirectStream")

  private def kafkaStreaming010(streamingContext: StreamingContext): Unit = {
    val topics = Array("sample_topic")
    val stream = KafkaUtils.createDirectStream[String, String](
      PreferConsistent, //It will consistently distribute partitions across all executors.
      Subscribe[String, String](topics, kafkaParams)
    ) => (record.key, record.value)).print()

    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      rdd.foreachPartition { _ =>
        val o: OffsetRange = offsetRanges(TaskContext.get.partitionId)
        println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")


  private def storingOffsetsItself(stream: InputDStream[ConsumerRecord[String, String]]) = {
    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges

  private def kafkaRdd010() = {
    val sparkContext = new SparkContext("local[*]", "kafkaRdd010")

    val offsetRanges = Array(
      // topic, partition, inclusive starting offset, exclusive ending offset
      OffsetRange("sample_topic", 0, 10, 20),
      OffsetRange("sample_topic", 1, 10, 20)
    val params = new ju.HashMap[String, Object](kafkaParams)
    val kafkaRDD =  KafkaUtils.createRDD[String, String](sparkContext, params , offsetRanges, PreferConsistent)

Example 9
package cloudflow.flink

import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.connectors.kafka._

import cloudflow.streamlets.{ CodecInlet, CodecOutlet }

private[flink] class FlinkKafkaCodecSerializationSchema[T: TypeInformation](outlet: CodecOutlet[T], topic: String)
    extends KafkaSerializationSchema[T] {
  override def serialize(value: T, timestamp: java.lang.Long): ProducerRecord[Array[Byte], Array[Byte]] =
    new ProducerRecord(topic, outlet.codec.encode(value))

private[flink] class FlinkKafkaCodecDeserializationSchema[T: TypeInformation](inlet: CodecInlet[T]) extends KafkaDeserializationSchema[T] {
  override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): T = inlet.codec.decode(record.value)
  override def isEndOfStream(value: T): Boolean                                 = false
  override def getProducedType: TypeInformation[T]                              = implicitly[TypeInformation[T]]
Example 10
package asura.kafka

import akka.kafka.scaladsl.Consumer
import akka.kafka.scaladsl.Consumer.Control
import akka.kafka.{ConsumerSettings, Subscriptions}
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer}

import scala.collection.JavaConverters._

object ConsumerBuilder {

  def buildAvroSource[V](
                          brokerUrl: String,
                          schemaRegisterUrl: String,
                          group: String,
                          topics: Set[String],
                          resetType: String = "latest",
                        )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = {

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    val consumerSettings: ConsumerSettings[String, V] = {
      val kafkaAvroDeserializer = new KafkaAvroDeserializer()
      kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]]

      ConsumerSettings(system, new StringDeserializer, deserializer)
        .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType)
    Consumer.plainSource(consumerSettings, Subscriptions.topics(topics))
Example 11
package com.banno.kafka

import org.scalacheck.{Arbitrary, Cogen, Gen}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

package object test {

  implicit def arbitraryProducerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ProducerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ProducerRecord(t, k, v)

  implicit def arbitraryConsumerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ConsumerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      p <- Gen.posNum[Int]
      o <- Gen.posNum[Long]
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ConsumerRecord(t, p, o, k, v)

  //these things are necessary for EqSpec
  implicit def producerRecordCogen[K, V]: Cogen[ProducerRecord[K, V]] =
    Cogen(pr => pr.key.toString.length.toLong + pr.value.toString.length.toLong) // ¯\_(ツ)_/¯
  implicit def consumerRecordCogen[K, V]: Cogen[ConsumerRecord[K, V]] =
    Cogen(cr => cr.key.toString.length.toLong + cr.value.toString.length.toLong) // ¯\_(ツ)_/¯
Example 12
package com.banno.kafka.metrics.prometheus

import cats.effect.Sync
import cats.implicits._
import org.apache.kafka.clients.consumer.ConsumerRecord
import io.prometheus.client._
import scala.math.max

object CurrentOffsetCounter {

  def apply[F[_]](cr: CollectorRegistry, prefix: String, clientId: String)(
      implicit F: Sync[F]
  ): F[ConsumerRecord[_, _] => F[Unit]] =
    F.delay {
          .name(prefix + "_current_offset")
          .help("Counter for last consumed (not necessarily committed) offset of topic partition.")
          .labelNames("client_id", "topic", "partition")
      .map { counter => (record: ConsumerRecord[_, _]) =>
        for {
          value <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).get)
          delta = max(0, record.offset.toDouble - value)
          _ <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).inc(delta))
        } yield ()
Example 13
Source File: DirectKafkaWordCount.scala    From spark-secure-kafka-app   with Apache License 2.0 5 votes vote down vote up
import org.apache.kafka.clients.consumer.ConsumerRecord

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, LocationStrategies, KafkaUtils}
import org.apache.spark.streaming._

object DirectKafkaWordCount {
    def main(args: Array[String]) {
      if (args.length < 2) {
                              |Usage: DirectKafkaWordCount <brokers> <topics>
                              |  <brokers> is a list of one or more Kafka brokers
                              |  <topics> is a list of one or more kafka topics to consume from
                              |  <ssl> true if using SSL, false otherwise.

      val Array(brokers, topics, ssl) = args

      // Create context with 2 second batch interval
      val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount")
      val ssc = new StreamingContext(sparkConf, Seconds(2))
      val isUsingSsl = ssl.toBoolean

      // Create direct kafka stream with brokers and topics
      val topicsSet = topics.split(",").toSet
      val commonParams = Map[String, Object](
        "bootstrap.servers" -> brokers,
        "security.protocol" -> (if (isUsingSsl) "SASL_SSL" else "SASL_PLAINTEXT"),
        "" -> "kafka",
        "auto.offset.reset" -> "earliest",
        "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
        "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
        "" -> "default",
        "" -> (false: java.lang.Boolean)

      val additionalSslParams = if (isUsingSsl) {
          "ssl.truststore.location" -> "/etc/cdep-ssl-conf/CA_STANDARD/truststore.jks",
          "ssl.truststore.password" -> "cloudera"
      } else {

      val kafkaParams = commonParams ++ additionalSslParams

      val messages: InputDStream[ConsumerRecord[String, String]] =
        KafkaUtils.createDirectStream[String, String](
          ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams)

      // Get the lines, split them into words, count the words and print
      val lines =
      val words = lines.flatMap(_.split(" "))
      val wordCounts = => (x, 1L)).reduceByKey(_ + _)

      // Start the computation
Example 14
package com.knoldus.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

object KafkaUtility {

  //TODO It should read from config
  private val kafkaParams = Map(
    "bootstrap.servers" -> "localhost:9092",
    "key.deserializer" -> classOf[StringDeserializer],
    "value.deserializer" -> classOf[StringDeserializer],
    "auto.offset.reset" -> "earliest",
    "" -> "tweet-consumer"

  private val preferredHosts = LocationStrategies.PreferConsistent

  def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] =
    KafkaUtils.createDirectStream[String, String](
      ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams)

Example 15
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import{ActorMaterializer, ClosedShape}
import{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

Example 16
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization._

object Codecs {
  implicit val stringSerializer: Serializer[String] = new StringSerializer()
  implicit val nullSerializer: Serializer[Array[Byte]] =
    new ByteArraySerializer()

  implicit val stringDeserializer: Deserializer[String] =
    new StringDeserializer()
  implicit val nullDeserializer: Deserializer[Array[Byte]] =
    new ByteArrayDeserializer()

  implicit val stringKeyValueCrDecoder
      : ConsumerRecord[String, String] => (String, String) =
    cr => (cr.key(), cr.value)
  implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String =
  implicit val stringKeyValueTopicCrDecoder
      : ConsumerRecord[String, String] => (String, String, String) = cr =>
    (cr.topic(), cr.key(), cr.value())

  implicit val keyNullValueCrDecoder
      : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) =
    cr => (cr.key(), cr.value)
  implicit val nullValueCrDecoder
      : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value()
  implicit val keyNullValueTopicCrDecoder
      : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) =
    cr => (cr.topic(), cr.key(), cr.value())
Example 17
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.jdk.CollectionConverters._
import scala.concurrent.duration._
import scala.util.Try

    private def getNextBatch[T](poll: FiniteDuration, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T
    ): Seq[T] =
      Try {
        val records = consumer.poll(duration2JavaDuration(poll))
        // use toList to force eager evaluation. toSeq is lazy
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)
Example 18
Source File: CommittableRecord.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import zio.{ RIO, Task }
import zio.kafka.serde.Deserializer

final case class CommittableRecord[K, V](record: ConsumerRecord[K, V], offset: Offset) {
  def deserializeWith[R, K1, V1](
    keyDeserializer: Deserializer[R, K1],
    valueDeserializer: Deserializer[R, V1]
  )(implicit ev1: K <:< Array[Byte], ev2: V <:< Array[Byte]): RIO[R, CommittableRecord[K1, V1]] =
    for {
      key   <- keyDeserializer.deserialize(record.topic(), record.headers(), record.key())
      value <- valueDeserializer.deserialize(record.topic(), record.headers(), record.value())
    } yield {
        record = new ConsumerRecord[K1, V1](
          ConsumerRecord.NULL_CHECKSUM, // Checksum is deprecated

  def key: K          = record.key
  def value: V        = record.value()
  def partition: Int  = record.partition()
  def timestamp: Long = record.timestamp()

object CommittableRecord {
  def apply[K, V](
    record: ConsumerRecord[K, V],
    commitHandle: Map[TopicPartition, Long] => Task[Unit]
  ): CommittableRecord[K, V] =
      OffsetImpl(new TopicPartition(record.topic(), record.partition()), record.offset(), commitHandle)
Example 19
Source File: TopicsEndpoint.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
import akka.http.scaladsl.common.EntityStreamingSupport
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import akka.pattern.ask
import akka.util.Timeout
import hydra.core.http.RouteSupport
import hydra.kafka.consumer.KafkaConsumerProxy.{GetLatestOffsets, LatestOffsetsResponse}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.collection.immutable.Map
import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, Future}

class TopicsEndpoint(consumerProxy:ActorSelection)(implicit ec:ExecutionContext) extends RouteSupport {

  import hydra.kafka.util.KafkaUtils._

  implicit val jsonStreamingSupport = EntityStreamingSupport.json()

  override val route =
    path("transports" / "kafka" / "consumer" / "topics" / Segment) {
      topicName =>
        get {
          extractRequestContext { ctx =>
            parameters('format.?, 'group.?, 'n ? 10, 'start ? "earliest") {
              (format, groupId, n, startOffset) =>
                val settings = loadConsumerSettings[Any, Any](
                val offsets = latestOffsets(topicName)
                val source = Consumer
                  .plainSource(settings, Subscriptions.topics(topicName))
                  .takeWhile(rec =>
                    rec._2 <= n && !shouldCancel(offsets, rec._1)
                  .map(rec => rec._1.value().toString)
                  .watchTermination()((_, termination) =>
                    termination.failed.foreach {
                      case cause =>


  def shouldCancel(
      fpartitions: Future[Map[TopicPartition, Long]],
      record: ConsumerRecord[Any, Any]
  ): Boolean = {
    if (fpartitions.isCompleted) {
      val partitions = Await.result(fpartitions, 1.millis)
      val tp = new TopicPartition(record.topic(), record.partition())
      partitions.get(tp) match {
        case Some(offset) => record.offset() >= offset
        case None         => false
    } else {


  private def latestOffsets(
      topic: String
  ): Future[Map[TopicPartition, Long]] = {
    implicit val timeout = Timeout(5 seconds)
    (consumerProxy ? GetLatestOffsets(topic))

Example 20
Source File: Codecs.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
import org.apache.avro.specific.SpecificRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

object Codecs {

  implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key(), cr.value)
  implicit def avroValueCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => V =
  implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic(), cr.key(), cr.value())

Example 21
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization._

object Codecs {
  implicit val stringSerializer: Serializer[String] = new StringSerializer()
  implicit val nullSerializer: Serializer[Array[Byte]] =
    new ByteArraySerializer()

  implicit val stringDeserializer: Deserializer[String] =
    new StringDeserializer()
  implicit val nullDeserializer: Deserializer[Array[Byte]] =
    new ByteArrayDeserializer()

  implicit val stringKeyValueCrDecoder
    : ConsumerRecord[String, String] => (String, String) =
    cr => (cr.key(), cr.value)
  implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String =
  implicit val stringKeyValueTopicCrDecoder
    : ConsumerRecord[String, String] => (String, String, String) = cr =>
    (cr.topic(), cr.key(), cr.value())

  implicit val keyNullValueCrDecoder
    : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) =
    cr => (cr.key(), cr.value)
  implicit val nullValueCrDecoder
    : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value()
  implicit val keyNullValueTopicCrDecoder
    : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) =
    cr => (cr.topic(), cr.key(), cr.value())
Example 22
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.util.Try

    private def getNextBatch[T](poll: Long, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T): Seq[T] =
      Try {
        import scala.collection.JavaConverters._
        val records = consumer.poll(java.time.Duration.ofMillis(poll))
        // use toList to force eager evaluation. toSeq is lazy
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)

Example 23
Source File: CachedKafkaConsumer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
import java.{util => ju}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging

  def getOrCreate(
      topic: String,
      partition: Int,
      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
    val topicPartition = new TopicPartition(topic, partition)
    val key = CacheKey(groupId, topicPartition)

    // If this is reattempt at running the task, then invalidate cache and start with
    // a new consumer
    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
      new CachedKafkaConsumer(topicPartition, kafkaParams)
    } else {
      if (!cache.containsKey(key)) {
        cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))