org.apache.kafka.common.serialization.StringDeserializer Scala Examples

The following examples show how to use org.apache.kafka.common.serialization.StringDeserializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamingKafka10.scala    From BigData-News   with Apache License 2.0 7 votes vote down vote up
package com.vita.spark

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe

object StreamingKafka10 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .master("local[2]")
      .appName("streaming")
      .getOrCreate()


    val sc = spark.sparkContext
    val ssc = new StreamingContext(sc, Seconds(5))

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "node6:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "0001",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("weblogs")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )

    val lines = stream.map(x => x.value())
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 2
Source File: KafkaTopicInfo.scala    From matcher   with MIT License 7 votes vote down vote up
package tools

import java.io.File

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer
import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta}
import com.wavesplatform.dex.settings.toConfigOps
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration.DurationInt

object KafkaTopicInfo extends App {
  implicit val system: ActorSystem = ActorSystem()

  val configFile = new File(args(0))
  val topic      = args(1)
  val from       = args(2).toLong
  val max        = args(3).toInt

  println(s"""configFile: ${configFile.getAbsolutePath}
             |topic: $topic
             |from: $from
             |max: $max""".stripMargin)

  val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos)

  val config = ConfigFactory
    .parseString("""waves.dex.events-queue.kafka.consumer.client {
                   |  client.id = "kafka-topics-info"
                   |  enable.auto.commit = false
                   |  auto.offset.reset = earliest
                   |}
                   |
                   |""".stripMargin)
    .withFallback {
      ConfigFactory
        .parseFile(configFile)
        .withFallback(ConfigFactory.defaultApplication())
        .withFallback(ConfigFactory.defaultReference())
        .resolve()
        .getConfig("waves.dex.events-queue.kafka")
    }

  val consumer = new KafkaConsumer[String, QueueEvent](
    config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties,
    new StringDeserializer,
    eventDeserializer
  )

  try {
    val topicPartition  = new TopicPartition(topic, 0)
    val topicPartitions = java.util.Collections.singletonList(topicPartition)
    consumer.assign(topicPartitions)

    {
      val r = consumer.partitionsFor(topic, requestTimeout)
      println(s"Partitions:\n${r.asScala.mkString("\n")}")
    }

    {
      val r = consumer.endOffsets(topicPartitions, requestTimeout)
      println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}")
    }

    consumer.seek(topicPartition, from)

    val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos)
    val lastOffset    = from + max
    var continue      = true
    while (continue) {
      println(s"Reading from Kafka")

      val xs = consumer.poll(pollDuriation).asScala.toVector
      xs.foreach { msg =>
        println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value()))
      }

      xs.lastOption.foreach { x =>
        if (x.offset() == lastOffset) continue = false
      }
    }
  } finally {
    consumer.close()
  }
} 
Example 3
Source File: IntegrationTest.scala    From kmq   with Apache License 2.0 6 votes vote down vote up
package com.softwaremill.kmq.redelivery

import java.time.Duration
import java.util.Random

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.testkit.TestKit
import com.softwaremill.kmq._
import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.time.{Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}

import scala.collection.mutable.ArrayBuffer

class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers {

  implicit val materializer = ActorMaterializer()
  import system.dispatcher

  "KMQ" should "resend message if not committed" in {
    val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}"
    val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis,
    1000)

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withBootstrapServers(bootstrapServer)
      .withGroupId(kmqConfig.getMsgConsumerGroupId)
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    val markerProducerSettings = ProducerSettings(system,
      new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer())
      .withBootstrapServers(bootstrapServer)
      .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName)
    val markerProducer = markerProducerSettings.createKafkaProducer()

    val random = new Random()

    lazy val processedMessages = ArrayBuffer[String]()
    lazy val receivedMessages = ArrayBuffer[String]()

    val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic
      .map { msg =>
      ProducerMessage.Message(
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg)
    }
      .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker
      .map(_.message.passThrough)
      .mapAsync(1) { msg =>
        msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched
      }
      .map { msg =>
        receivedMessages += msg.value
        msg
      }
      .filter(_ => random.nextInt(5) != 0)
      .map { processedMessage =>
        processedMessages += processedMessage.value
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE)
      }
      .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers
      .run()

    val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig)

    val messages = (0 to 20).map(_.toString)
    messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg))

    eventually {
      receivedMessages.size should be > processedMessages.size
      processedMessages.sortBy(_.toInt).distinct shouldBe messages
    }(PatienceConfig(timeout = Span(15, Seconds)), implicitly)

    redeliveryHook.close()
    control.shutdown()
  }

  override def afterAll(): Unit = {
    super.afterAll()
    TestKit.shutdownActorSystem(system)
  }
} 
Example 4
Source File: KafkaSpec.scala    From kmq   with Apache License 2.0 6 votes vote down vote up
package com.softwaremill.kmq.redelivery.infrastructure

import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.{BeforeAndAfterEach, Suite}

trait KafkaSpec extends BeforeAndAfterEach { self: Suite =>

  val testKafkaConfig = EmbeddedKafkaConfig(9092, 2182)
  private implicit val stringDeserializer = new StringDeserializer()

  def sendToKafka(topic: String, message: String): Unit = {
    EmbeddedKafka.publishStringMessageToKafka(topic, message)(testKafkaConfig)
  }

  def consumeFromKafka(topic: String): String = {
    EmbeddedKafka.consumeFirstStringMessageFrom(topic)(testKafkaConfig)
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    EmbeddedKafka.start()(testKafkaConfig)
  }

  override def afterEach(): Unit = {
    super.afterEach()
    EmbeddedKafka.stop()
  }
} 
Example 5
Source File: ConsumerBuilder.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka

import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.scaladsl.Consumer.Control
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Source
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer}

import scala.collection.JavaConverters._

object ConsumerBuilder {

  def buildAvroSource[V](
                          brokerUrl: String,
                          schemaRegisterUrl: String,
                          group: String,
                          topics: Set[String],
                          resetType: String = "latest",
                        )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = {

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    )
    val consumerSettings: ConsumerSettings[String, V] = {
      val kafkaAvroDeserializer = new KafkaAvroDeserializer()
      kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]]

      ConsumerSettings(system, new StringDeserializer, deserializer)
        .withBootstrapServers(brokerUrl)
        .withGroupId(group)
        .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType)
    }
    Consumer.plainSource(consumerSettings, Subscriptions.topics(topics))
  }
} 
Example 6
Source File: Consumers.scala    From Fast-Data-Processing-Systems-with-SMACK-Stack   with MIT License 5 votes vote down vote up
import akka.kafka._
import akka.kafka.scaladsl._
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.kafka.common.serialization.ByteArrayDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig

val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
  .withBootstrapServers("localhost:9092")
  .withGroupId("group1")
  .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

// Consume messages and store a representation, including offset, in DB example:
db.loadOffset().foreach { fromOffset =>
	val subscription = Subscriptions.assignmentWithOffset(new TopicPartition("topic1", 1) -> fromOffset)
    Consumer.plainSource(consumerSettings, subscription)
      .mapAsync(1)(db.save)}

// Consume messages at-most-once example:
Consumer.atMostOnceSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { record =>
      rocket.launch(record.value
}

// Consume messages at-least-once example:
Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { 
      msg => db.update(msg.value).flatMap(_ =>   
        msg.committableOffset.commitScaladsl())
}

// Connect a Consumer to Producer example: 
Consumer.committableSource(consumerSettings.withClientId("client1"))
    .map(msg => ProducerMessage.Message(
        new ProducerRecord[Array[Byte], String]("topic2", msg.value), 
          msg.committableOffset))
    .to(Producer.commitableSink(producerSettings))

// Consume messages at-least-once, and commit in batches example:
Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { msg =>
      db.update(msg.value).map(_ => msg.committableOffset)
    }
    .batch(max = 10, first => 
        CommittableOffsetBatch.empty.updated(first)) { (batch, elem) =>
          batch.updated(elem)
    }.mapAsync(1)(_.commitScaladsl())

// A reusable Kafka consumer example:
val consumer: ActorRef = system.actorOf(KafkaConsumerActor.props(consumerSettings))

// Manually assign topic partition to it
val stream1 = Consumer
    .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 1)))
    .via(business)
    .to(Sink.ignore)

// Manually assign another topic partition
val stream2 = Consumer
    .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 2)))
    .via(business)
    .to(Sink.ignore)

// Consumer group example:
val consumerGroup = Consumer.committablePartitionedSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))

// Process each assigned partition separately
  consumerGroup.map {
    case (topicPartition, source) =>
      source
        .via(business)
        .toMat(Sink.ignore)(Keep.both)
        .run()
  }.mapAsyncUnordered(maxPartitions)(_._2) 
Example 7
Source File: TestConsumer.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka.consumer

import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Sink
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.common.serialization.StringDeserializer

object TestConsumer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    logger.info("Start consumer")

    implicit val system = ActorSystem("consumer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withGroupId("test-group1")

    val done = Consumer
      .plainSource(consumerSettings, Subscriptions.topics("test-topic"))
      .runWith(Sink.foreach(record =>
        logger.info(s"topic:${record.topic()}, partition:${record.partition()}, offset:${record.offset()}, key:${record.key()}, value: ${record.value()}"))
      )
    done onComplete {
      case scala.util.Success(_) => logger.info("Done"); system.terminate()
      case scala.util.Failure(err) => logger.error(err.toString); system.terminate()
    }
  }
} 
Example 8
Source File: CountIntByStreaming.scala    From wow-spark   with MIT License 5 votes vote down vote up
package com.sev7e0.wow.kafka

import com.sev7e0.wow.spark_streaming.StreamingLogger
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
;

object CountIntByStreaming {

  val brokerList = "localhost:9092"
  val topic = "randomCount"
  val groupId = "group";
  val path = "temp/checkpoint/CountIntBySS";
  val master = "local";

  def main(args: Array[String]): Unit = {
    val prop = initProperties()
    val topics = Array(topic)

    //设置打印日志级别
    StreamingLogger.setLoggerLevel()

    val sparkConf = new SparkConf()
      .setAppName(CountIntByStreaming.getClass.getName)
      .setMaster(master)

    //实例化StreamingContext,设置间隔两秒
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    //设置checkpoint路径
    ssc.checkpoint(path)


    //使用KafkaUtils获取DStream
    val kafkaDS = KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, prop))


    kafkaDS.map(record => {
      val value = record.value().toLong
      value
    }).reduce(_ + _).print()

    
  def initProperties(): Map[String, Object] = Map[String, Object](
    ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
    ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList,
    ConsumerConfig.GROUP_ID_CONFIG -> groupId,
    ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",
    ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
  )
} 
Example 9
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.{Seconds, StreamingContext}

package object example {

  def setupLogging(): Unit = {
    import org.apache.log4j.{Level, Logger}
    val rootLogger = Logger.getRootLogger
    rootLogger.setLevel(Level.ERROR)
  }

  def kafkaParams = Map[String, Object](
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.deserializer" -> classOf[StringDeserializer],
    "value.deserializer" -> classOf[StringDeserializer],
    "group.id" -> "mygroup1",
    "auto.offset.reset" -> "latest",
    "enable.auto.commit" -> (false: java.lang.Boolean)
  )

  def launchWithCheckpointing(logic: StreamingContext => Unit, appName:String, checkpointPath:String): Unit = {
    val streamingContext = new StreamingContext("local[*]", appName, Seconds(2))
    setupLogging()
    logic.apply(streamingContext)

    streamingContext.checkpoint(checkpointPath)
    streamingContext.start()
    streamingContext.awaitTermination()
  }

  def launchWithItself(logic: StreamingContext => Unit, appName:String): Unit = {
    val streamingContext = new StreamingContext("local[*]", appName, Seconds(2))
    setupLogging()
    logic.apply(streamingContext)

    streamingContext.start()
    streamingContext.awaitTermination()
  }
} 
Example 10
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import java.util.Collections

import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import cakesolutions.kafka.KafkaProducer.Conf
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

package object example {

  val topic = "sample_topic"

  val kafkaProducer = KafkaProducer(
    Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = "localhost:9092")
  )

  val kafkaProducerConf = KafkaProducer.Conf(
    new StringSerializer, new StringSerializer,
    bootstrapServers = "localhost:9092"
  )

  val kafkaConsumerConf = KafkaConsumer.Conf(
    new StringDeserializer,
    new StringDeserializer,
    groupId = "test_group",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST,
    bootstrapServers = "localhost:9092")
} 
Example 11
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import akka.actor.ActorSystem
import akka.kafka.{ConsumerSettings, ProducerSettings}
import akka.stream.ActorMaterializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

package object example {
  implicit val system = ActorSystem("FlowProducerMain")
  implicit val materializer = ActorMaterializer()

  val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers("localhost:9092")

  val topic = "sample_topic"
  val topic1 = "topic1"
  val topic2 = "topic2"


  val consumerSettings =
    ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
} 
Example 12
Source File: ConsumerSelfManaged.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka.{ConsumerRecords, Extractor, KafkaConsumerActor, Offsets}
import com.typesafe.config.Config
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  def apply(config: Config): ActorRef = {
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "groupId",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(config)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds)

    val system = ActorSystem()
    system.actorOf(Props(new ConsumerSelfManaged(consumerConf, actorConf)))
  }
}

class ConsumerSelfManaged(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging {

  val recordsExt: Extractor[Any, ConsumerRecords[String, String]] = ConsumerRecords.extractor[String, String]

  val consumer: ActorRef = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )

  consumer ! Subscribe.ManualOffset(Offsets(Map((new TopicPartition("topic1", 0), 1))))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records)
      sender() ! Confirm(records.offsets)
  }

  private def processRecords(records: ConsumerRecords[String, String]) = {
    records.pairs.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }
    log.info(s"Batch complete, offsets: ${records.offsets}")
  }
} 
Example 13
Source File: AutoPartitionConsumer.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  def apply(config: Config): ActorRef = {
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(config)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds)

    val system = ActorSystem()
    system.actorOf(Props(new AutoPartitionConsumer(consumerConf, actorConf)))
  }
}

class AutoPartitionConsumer(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )
  context.watch(consumer)

  consumer ! Subscribe.AutoPartition(List("topic1"))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records.pairs)
      sender() ! Confirm(records.offsets, commit = true)
  }

  private def processRecords(records: Seq[(Option[String], String)]) =
    records.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }
} 
Example 14
Source File: ConsumerToProducer.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka._
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.concurrent.duration._


  def apply(consumerConfig: Config, producerConfig: Config): ActorRef = {

    // Create KafkaConsumerActor config with bootstrap.servers specified in Typesafe config
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(consumerConfig)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds, 5)

    // Create KafkaProducerActor config with defaults and bootstrap.servers specified in Typesafe config
    val producerConf = KafkaProducer.Conf(new StringSerializer, new StringSerializer).withConf(producerConfig)

    val system = ActorSystem()
    system.actorOf(Props(new ConsumerToProducer(consumerConf, actorConf, producerConf)))
  }
}

class ConsumerToProducer(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf,
  producerConf: KafkaProducer.Conf[String, String]) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  // The KafkaConsumerActor
  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )
  context.watch(consumer)

  // The KafkaProducerActor
  private val producer = context.actorOf(KafkaProducerActor.props(producerConf))

  consumer ! Subscribe.AutoPartition(List("topic1"))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records)

    // Confirmed Offsets from KafkaProducer
    case o: Offsets =>
      consumer ! Confirm(o, commit = true)
  }

  // Demonstrates some transformation of the messages before forwarding to KafkaProducer
  private def processRecords(records: ConsumerRecords[String, String]) = {
    val transformedRecords = records.pairs.map { case (key, value) =>
      (key, value + ".")
    }

    // Send records to Topic2.  Offsets will be sent back to this actor once confirmed.
    producer ! ProducerRecords.fromKeyValues[String, String]("topic2", transformedRecords, Some(records.offsets), None)

    // Could have sent them like this if we didn't first transform:
    // producer ! ProducerRecords.fromConsumerRecords("topic2", records, None)
  }
} 
Example 15
Source File: ConsumerRecovery.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, OneForOneStrategy, Props, SupervisorStrategy}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka.{ConsumerRecords, Extractor, KafkaConsumerActor}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy(maxNrOfRetries = 10) {
    case _: KafkaConsumerActor.ConsumerException =>
      log.info("Consumer exception caught. Restarting consumer.")
      SupervisorStrategy.Restart
    case _ =>
      SupervisorStrategy.Escalate
  }

  val recordsExt: Extractor[Any, ConsumerRecords[String, String]] = ConsumerRecords.extractor[String, String]

  val consumer: ActorRef = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )

  consumer ! Subscribe.AutoPartition(List("topic1"))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records.pairs)
      sender() ! Confirm(records.offsets, commit = true)
  }

  private def processRecords(records: Seq[(Option[String], String)]) =
    records.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }
} 
Example 16
Source File: AutoPartitionConsumerWithManualOffset.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor._
import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor, Offsets}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  def apply(config: Config): ActorRef = {
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(config)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds)

    val system = ActorSystem()
    system.actorOf(Props(new AutoPartitionConsumerWithManualOffset(consumerConf, actorConf)))
  }
}

class AutoPartitionConsumerWithManualOffset(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )

  consumer ! Subscribe.AutoPartitionWithManualOffset(List("topic1"), assignedListener, revokedListener)

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records.pairs)
      sender() ! Confirm(records.offsets)
  }

  private def processRecords(records: Seq[(Option[String], String)]) =
    records.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }

  private def assignedListener(tps: List[TopicPartition]): Offsets = {
    log.info("Partitions have been assigned" + tps.toString())

    // Should load the offsets from a persistent store and any related state
    val offsetMap = tps.map{ tp =>
      tp -> 0l
    }.toMap

    // Return the required offsets for the assigned partitions
    Offsets(offsetMap)
  }

  private def revokedListener(tps: List[TopicPartition]): Unit = {
    log.info("Partitions have been revoked" + tps.toString())
    // Opportunity to clear any state for the revoked partitions
    ()
  }
} 
Example 17
Source File: KafkaConsumerPerfSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka

import cakesolutions.kafka.KafkaConsumer.Conf
import com.typesafe.config.ConfigFactory
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random


class KafkaConsumerPerfSpec extends FlatSpecLike
  with Matchers
  with BeforeAndAfterAll {

  val log = LoggerFactory.getLogger(getClass)

  val config = ConfigFactory.load()

  val msg1k = scala.io.Source.fromInputStream(getClass.getResourceAsStream("/1k.txt")).mkString

  val consumer = KafkaConsumer(
    Conf(config.getConfig("consumer"),
      new StringDeserializer,
      new StringDeserializer)
  )

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  "Kafka Consumer with single partition topic" should "perform" in {
    val topic = randomString
    val producerConf = KafkaProducer.Conf(config.getConfig("producer"), new StringSerializer, new StringSerializer)
    val producer = KafkaProducer[String, String](producerConf)

    1 to 100000 foreach { n =>
      producer.send(KafkaProducerRecord(topic, None, msg1k))
    }
    producer.flush()
    log.info("Delivered 100000 msg to topic {}", topic)

    consumer.subscribe(List(topic).asJava)

    var start = 0l

    var total = 0

    while (total < 100000) {
      if(total == 0)
        start = System.currentTimeMillis()
      val count = consumer.poll(1000).count()
      total += count
    }

    val totalTime = System.currentTimeMillis() - start
    val messagesPerSec = 100000 / totalTime * 1000
    log.info("Total Time millis : {}", totalTime)
    log.info("Messages per sec  : {}", messagesPerSec)

    totalTime should be < 4000L

    consumer.close()
    producer.close()
  }
} 
Example 18
Source File: IdempotentProducerSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka

import org.apache.kafka.clients.consumer.ConsumerRecords
import org.apache.kafka.common.KafkaException
import org.apache.kafka.common.requests.IsolationLevel
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random

class IdempotentProducerSpec extends KafkaIntSpec {
  private val log = LoggerFactory.getLogger(getClass)

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val idempotentProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      enableIdempotence = true)

  val transactionalProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      transactionalId = Some("t1"),
      enableIdempotence = true)

  val consumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false)

  val transactionConsumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false,
      isolationLevel = IsolationLevel.READ_COMMITTED)

  "Producer with idempotent config" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(idempotentProducerConfig)
    val consumer = KafkaConsumer(consumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)
    producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
    producer.flush()

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }

  "Producer with transaction" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(transactionalProducerConfig)
    val consumer = KafkaConsumer(transactionConsumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)

    producer.initTransactions()

    try {
      producer.beginTransaction()
      producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
      producer.commitTransaction()
    } catch {
      case ex: KafkaException =>
        log.error(ex.getMessage, ex)
        producer.abortTransaction()
    }

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }
} 
Example 19
Source File: KafkaProducerActorSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.akka

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.util.Random

class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) {

  def this() = this(ActorSystem("KafkaProducerActorSpec"))

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val deserializer = new StringDeserializer
  val consumerConf = KafkaConsumer.Conf(
    deserializer, deserializer,
    bootstrapServers = s"localhost:$kafkaPort",
    groupId = "test",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST
  )

  val serializer = new StringSerializer
  val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort")

  "KafkaProducerActor" should "write a given batch to Kafka" in {
    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar"))
    val message = ProducerRecords(batch, Some('response))

    probe.send(producer, message)

    probe.expectMsg('response)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in {
    import scala.concurrent.duration._

    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar")
    )
    val message = ProducerRecords(batch)

    probe.send(producer, message)

    probe.expectNoMessage(3.seconds)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) =
    kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer)
} 
Example 20
Source File: Kafka2OdpsDemo.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.streaming.kafka

import com.aliyun.odps.spark.examples.streaming.common.SparkSessionSingleton
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

object Kafka2OdpsDemo {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("test")
    val ssc = new StreamingContext(sparkConf, Seconds(10))

    // 请使用OSS作为Checkpoint存储,修改为有效OSS路径。OSS访问文档请参考 https://github.com/aliyun/MaxCompute-Spark/wiki/08.-Oss-Access%E6%96%87%E6%A1%A3%E8%AF%B4%E6%98%8E
    ssc.checkpoint("oss://bucket/checkpointdir")

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    // 创建kafka dstream
    val topics = Set("test")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
      )
    val dstream = recordDstream.map(f => (f.key(), f.value()))
    // 解析kafka数据并写入odps
    val data: DStream[String] = dstream.map(_._2)
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    wordsDStream.foreachRDD(rdd => {
      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
      import spark.implicits._

      rdd.toDF("id").write.mode("append").saveAsTable("test_table")
    })

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 21
Source File: KafkaStreamingDemo.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object KafkaStreamingDemo {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("KafkaStreamingDemo")
      .getOrCreate()

    val ssc = new StreamingContext(spark.sparkContext, Seconds(5))

    // 请使用OSS作为Checkpoint存储
    ssc.checkpoint("oss://bucket/checkpointDir/")

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "192.168.1.1:9200,192.168.1.2:9200,192.168.1.3:9200",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Set("event_topic")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
      )


    val dstream = recordDstream.map(f => (f.key(), f.value()))
    val data: DStream[String] = dstream.map(_._2)
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    val wordAndOneDstream: DStream[(String, Int)] = wordsDStream.map((_, 1))
    val result: DStream[(String, Int)] = wordAndOneDstream.reduceByKey(_ + _)
    result.print()

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 22
Source File: UseCase.scala    From Fast-Data-Processing-Systems-with-SMACK-Stack   with MIT License 5 votes vote down vote up
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, Source}
import com.softwaremill.react.kafka.KafkaMessages._
import org.apache.kafka.common.serialization.{StringSerializer, StringDeserializer}
import com.softwaremill.react.kafka.{ProducerMessage, ConsumerProperties, ProducerProperties, ReactiveKafka}
import org.reactivestreams.{ Publisher, Subscriber }

implicit val actorSystem = ActorSystem("ReactiveKafka")
implicit val materializer = ActorMaterializer()

val kafka = new ReactiveKafka()
val publisher: Publisher[StringConsumerRecord] = kafka.consume(ConsumerProperties(
 bootstrapServers = "localhost:9092",
 topic = "lowercaseStrings",
 groupId = "groupName",
 valueDeserializer = new StringDeserializer()
))

val subscriber: Subscriber[StringProducerMessage] = kafka.publish(ProducerProperties(
  bootstrapServers = "localhost:9092",
  topic = "uppercaseStrings",
  valueSerializer = new StringSerializer()
))

Source.fromPublisher(publisher).map(m => ProducerMessage(m.value().toUpperCase))
  .to(Sink.fromSubscriber(subscriber)).run() 
Example 23
Source File: EventAggregationSpec.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
} 
Example 24
Source File: KafkaStream.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.bundle.streaming

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf.{ConfigurableStreamingStop, Port, StopGroup}
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe


class KafkaStream extends ConfigurableStreamingStop{
  override var batchDuration: Int = _
  override val authorEmail: String = "[email protected]"
  override val description: String = "Read data from kafka"
  override val inportList: List[String] = List(Port.DefaultPort)
  override val outportList: List[String] = List(Port.DefaultPort)

  var brokers:String = _
  var groupId:String = _
  var topics:Array[String] = _
  override def setProperties(map: Map[String, Any]): Unit = {
    brokers=MapUtil.get(map,key="brokers").asInstanceOf[String]
    groupId=MapUtil.get(map,key="groupId").asInstanceOf[String]
    topics=MapUtil.get(map,key="topics").asInstanceOf[String].split(",").map(x => x.trim)
    val timing = MapUtil.get(map,key="batchDuration")
    batchDuration=if(timing == None) new Integer(1) else timing.asInstanceOf[String].toInt
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val brokers = new PropertyDescriptor().name("brokers").displayName("brokers").description("kafka brokers, seperated by ','").defaultValue("").required(true)
    val groupId = new PropertyDescriptor().name("groupId").displayName("groupId").description("kafka consumer group").defaultValue("group").required(true)
    val topics = new PropertyDescriptor().name("topics").displayName("topics").description("kafka topics").defaultValue("").required(true)
    val batchDuration = new PropertyDescriptor().name("batchDuration").displayName("batchDuration").description("the streaming batch duration").defaultValue("1").required(true)
    descriptor = brokers :: descriptor
    descriptor = groupId :: descriptor
    descriptor = topics :: descriptor
    descriptor = batchDuration :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/streaming/KafkaStream.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.StreamingGroup)
  }

  override def getDStream(ssc: StreamingContext): DStream[String] = {
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> brokers,
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> groupId,
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false:java.lang.Boolean)
    )
    val stream = KafkaUtils.createDirectStream[String,String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )
    stream.map(record => record.key() + "," + record.value())
    //stream.asInstanceOf[DStream[ConsumerRecord]]

  }

  override def initialize(ctx: ProcessContext): Unit = {}

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {}
} 
Example 25
Source File: ConfManager.scala    From HadoopLearning   with MIT License 5 votes vote down vote up
package com.utils

import java.util.regex.Pattern

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.mutable.HashMap

/**
  * 描述 Spark Streaming 配置
  *
  * @author liumm
  * @since 2018-07-27 20:27
  */
object ConfManager {

  /**
    * 每次入库最大记录数量
    */
  val maxRecords = 1000

  /**
    * 配置Kafka
    *
    * @param streamConf
    * @return
    */
  def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = {
    (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics))
  }

  def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = {
    val kafkaParams = new HashMap[String, String]()
    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers)
    kafkaParams += ("metadata.broker.list" -> streamConf.brokers)
    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest")
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId)
    kafkaParams.toMap
  }

  /**
    * 生成Kafka的Consumer配置信息
    *
    * @return Kafka的Consumer配置信息
    */
  private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = {
    val kafkaParams = new HashMap[String, Object]()

    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers)
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId)
    kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
    kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])

    kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024))
    kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100))

    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest")
    //关闭kafka自动提交offset方式
    kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))

    kafkaParams.toMap
  }

  def newStreamConf() = {
    val conf = new StreamConf()
    conf.zkUrl = "hdp01:2181"
    conf.brokers = "hdp01:9092"
    conf.groupId = "liumm_group"
    conf.topics = "i57_.*"
    conf
  }

} 
Example 26
Source File: WordCountConsumer.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.kafka

import akka.Done
import akka.actor.{ActorSystem, Props}
import akka.kafka.scaladsl.Consumer.DrainingControl
import akka.kafka.scaladsl.{Committer, Consumer}
import akka.kafka.{CommitterSettings, ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.util.Timeout
import alpakka.kafka.TotalFake.{IncrementMessage, IncrementWord}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer}

import scala.concurrent.Future
import scala.concurrent.duration._


object WordCountConsumer extends App {
  implicit val system = ActorSystem("WordCountConsumer")
  implicit val ec = system.dispatcher

  val total = system.actorOf(Props[TotalFake], "totalFake")

  val committerSettings = CommitterSettings(system).withMaxBatch(1)

  def createConsumerSettings(group: String): ConsumerSettings[String, java.lang.Long] = {
    ConsumerSettings(system, new StringDeserializer , new LongDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId(group)
      //Define consumer behavior upon starting to read a partition for which it does not have a committed offset or if the committed offset it has is invalid
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
  }

  def createAndRunConsumerWordCount(id: String) = {
    Consumer.committableSource(createConsumerSettings("wordcount consumer group"), Subscriptions.topics("wordcount-output"))
      .mapAsync(1) { msg =>
        //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}")
        if (msg.record.key().equalsIgnoreCase("fakeNews")) { //hardcoded because WordCountProducer.fakeNewsKeyword does not work
          import akka.pattern.ask
          implicit val askTimeout: Timeout = Timeout(3.seconds)
          (total ? IncrementWord(msg.record.value.toInt, id))
            .mapTo[Done]
            .map(_ => msg.committableOffset)
        } else {
          Future(msg).map(_ => msg.committableOffset)
        }
      }
      .via(Committer.flow(committerSettings))
      .toMat(Sink.seq)(DrainingControl.apply)
      .run()
  }

  def createAndRunConsumerMessageCount(id: String) = {
    Consumer.committableSource(createConsumerSettings("messagecount consumer group"), Subscriptions.topics("messagecount-output"))
      .mapAsync(1) { msg =>
        //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}")
        import akka.pattern.ask
        implicit val askTimeout: Timeout = Timeout(3.seconds)
        (total ? IncrementMessage(msg.record.value.toInt, id))
          .mapTo[Done]
          .map(_ => msg.committableOffset)
      }
      .via(Committer.flow(committerSettings))
      .toMat(Sink.seq)(DrainingControl.apply)
      .run()
  }

  val drainingControlW1 = createAndRunConsumerWordCount("W.1")
  val drainingControlW2 = createAndRunConsumerWordCount("W.2")
  val drainingControlM = createAndRunConsumerMessageCount("M")


  sys.addShutdownHook{
    println("Got control-c cmd from shell, about to shutdown...")
    drainingControlW1.drainAndShutdown()
    drainingControlW2.drainAndShutdown()
    drainingControlM.drainAndShutdown()
  }
} 
Example 27
Source File: SKRSpec.scala    From spark-kafka-writer   with Apache License 2.0 5 votes vote down vote up
package com.github.benfradet.spark.kafka.writer

import java.util.concurrent.atomic.AtomicInteger

import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.scalatest.concurrent.Eventually
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}

import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

case class Foo(a: Int, b: String)

trait SKRSpec
  extends AnyWordSpec
  with Matchers
  with BeforeAndAfterEach
  with BeforeAndAfterAll
  with Eventually {

  val sparkConf = new SparkConf()
    .setMaster("local[1]")
    .setAppName(getClass.getSimpleName)

  var ktu: KafkaTestUtils = _
  override def beforeAll(): Unit = {
    ktu = new KafkaTestUtils
    ktu.setup()
  }
  override def afterAll(): Unit = {
    SKRSpec.callbackTriggerCount.set(0)
    if (ktu != null) {
      ktu.tearDown()
      ktu = null
    }
  }

  var topic: String = _
  var ssc: StreamingContext = _
  var spark: SparkSession = _
  override def afterEach(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }
    if (spark != null) {
      spark.stop()
      spark = null
    }
  }
  override def beforeEach(): Unit = {
    ssc = new StreamingContext(sparkConf, Seconds(1))
    spark = SparkSession.builder
      .config(sparkConf)
      .getOrCreate()
    topic = s"topic-${Random.nextInt()}"
    ktu.createTopics(topic)
  }

  def collect(ssc: StreamingContext, topic: String): ArrayBuffer[String] = {
    val kafkaParams = Map(
      "bootstrap.servers" -> ktu.brokerAddress,
      "auto.offset.reset" -> "earliest",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "test-collect"
    )
    val results = new ArrayBuffer[String]
    KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](Set(topic), kafkaParams)
    ).map(_.value())
      .foreachRDD { rdd =>
        results ++= rdd.collect()
        ()
      }
    results
  }

  val producerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )
}

object SKRSpec {
  val callbackTriggerCount = new AtomicInteger()
} 
Example 28
Source File: WebSocket.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package controllers

import javax.inject.{Inject, Singleton}

import akka.actor.{Actor, ActorRef, ActorSystem, Props}
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.stream.{Materializer, ThrottleMode}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import play.api.libs.streams.ActorFlow
import play.api.mvc.{Controller, WebSocket}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._


//@Singleton
class KafkaWebSocket @Inject() (implicit system: ActorSystem, materializer: Materializer) extends Controller {

  def kafkaWS = WebSocket.accept[String, String] { request =>
    ActorFlow.actorRef(out => KafkaWSActor.props(out))
  }

  object KafkaWSActor {
    def props(outRef: ActorRef) = Props(new KafkaWSActor(outRef))
  }

  class KafkaWSActor(outRef: ActorRef) extends Actor {

    val config = ConfigFactory.load()
    val combinedConfig = ConfigFactory.defaultOverrides()
      .withFallback(config)
      .withFallback(ConfigFactory.defaultApplication())
      .getConfig("trucking-web-application.backend")

    val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      //.withBootstrapServers("sandbox-hdf.hortonworks.com:6667")
      .withBootstrapServers(combinedConfig.getString("kafka.bootstrap-servers"))
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    Consumer.committableSource(consumerSettings, Subscriptions.topics("trucking_data_joined"))
      .mapAsync(1) { msg => Future(outRef ! msg.record.value).map(_ => msg) }
      //.mapAsync(1) { msg => msg.committableOffset.commitScaladsl() } // TODO: Disabling commits for debug
      .throttle(1, 250.milliseconds, 1, ThrottleMode.Shaping)
      .runWith(Sink.ignore)

    def receive = {
      case msg: String => outRef ! s"Ack: $msg"
    }
  }

} 
Example 29
Source File: ConsumerStream.scala    From reactive-kafka-microservice-template   with Apache License 2.0 5 votes vote down vote up
package com.omearac.consumers

import akka.actor.{ActorRef, ActorSystem}
import akka.kafka.ConsumerMessage.CommittableOffsetBatch
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerMessage, ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.{Flow, Sink}
import com.omearac.shared.EventMessages.FailedMessageConversion
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.{AkkaStreams, EventSourcing}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}

import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Future



trait ConsumerStream extends AkkaStreams with EventSourcing {
    implicit val system: ActorSystem
    def self: ActorRef


    def createStreamSink(consumerActorSink : ActorRef) = {
        Sink.actorRefWithAck(consumerActorSink, "STREAM_INIT", "OK", "STREAM_DONE")
    }

    def createStreamSource(consumerProperties: Map[String,String])  = {
        val kafkaMBAddress = consumerProperties("bootstrap-servers")
        val groupID = consumerProperties("groupId")
        val topicSubscription = consumerProperties("subscription-topic")
        val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
            .withBootstrapServers(kafkaMBAddress)
            .withGroupId(groupID)
            .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

        Consumer.committableSource(consumerSettings, Subscriptions.topics(topicSubscription))
    }

    def createStreamFlow[msgType: Conversion] = {
        Flow[ConsumerMessage.CommittableMessage[Array[Byte], String]]
            .map(msg => (msg.committableOffset, Conversion[msgType].convertFromJson(msg.record.value)))
            //Publish the conversion error event messages returned from the JSONConversion
            .map (tuple => publishConversionErrors[msgType](tuple))
            .filter(result => result.isRight)
            .map(test => test.right.get)
            //Group the commit offsets and correctly converted messages for more efficient Kafka commits
            .batch(max = 20, tuple => (CommittableOffsetBatch.empty.updated(tuple._1), ArrayBuffer[msgType](tuple._2)))
            {(tupleOfCommitOffsetAndMsgs, tuple) =>
            (tupleOfCommitOffsetAndMsgs._1.updated(tuple._1), tupleOfCommitOffsetAndMsgs._2 :+ tuple._2)
            }
            //Take the first element of the tuple (set of commit numbers) to add to kafka commit log and then return the collection of grouped case class messages
            .mapAsync(4)(tupleOfCommitOffsetAndMsgs => commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs))
            .map(msgGroup => msgGroup._2)
    }

    def commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs: (ConsumerMessage.CommittableOffsetBatch, ArrayBuffer[msgType])) = Future {
        (tupleOfCommitOffsetAndMsgs._1.commitScaladsl(), tupleOfCommitOffsetAndMsgs._2)
    }

    def publishConversionErrors[msgType](tupleOfCommitOffsetAndConversionResults: (ConsumerMessage.CommittableOffset, Either[FailedMessageConversion,msgType]))
    : Either[Unit,(ConsumerMessage.CommittableOffset,msgType)] = {

        if (tupleOfCommitOffsetAndConversionResults._2.isLeft) {

            //Publish a local event that there was a failure in conversion
            publishLocalEvent(tupleOfCommitOffsetAndConversionResults._2.left.get)

            //Commit the Kafka Offset to acknowledge that the message was consumed
            Left(tupleOfCommitOffsetAndConversionResults._1.commitScaladsl())
        }
        else
            Right(tupleOfCommitOffsetAndConversionResults._1,tupleOfCommitOffsetAndConversionResults._2.right.get)
    }
} 
Example 30
Source File: OpenWhiskEvents.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.monitoring.metrics

import akka.actor.{ActorSystem, CoordinatedShutdown}
import akka.event.slf4j.SLF4JLogging
import akka.http.scaladsl.Http
import akka.kafka.ConsumerSettings
import akka.stream.ActorMaterializer
import com.typesafe.config.Config
import kamon.Kamon
import kamon.prometheus.PrometheusReporter
import org.apache.kafka.common.serialization.StringDeserializer
import pureconfig._
import pureconfig.generic.auto._

import scala.concurrent.duration.FiniteDuration
import scala.concurrent.{ExecutionContext, Future}

object OpenWhiskEvents extends SLF4JLogging {

  case class MetricConfig(port: Int,
                          enableKamon: Boolean,
                          ignoredNamespaces: Set[String],
                          renameTags: Map[String, String],
                          retry: RetryConfig)

  case class RetryConfig(minBackoff: FiniteDuration, maxBackoff: FiniteDuration, randomFactor: Double, maxRestarts: Int)

  def start(config: Config)(implicit system: ActorSystem,
                            materializer: ActorMaterializer): Future[Http.ServerBinding] = {
    implicit val ec: ExecutionContext = system.dispatcher

    val prometheusReporter = new PrometheusReporter()
    Kamon.registerModule("prometheus", prometheusReporter)
    Kamon.init(config)

    val metricConfig = loadConfigOrThrow[MetricConfig](config, "whisk.user-events")

    val prometheusRecorder = PrometheusRecorder(prometheusReporter, metricConfig)
    val recorders = if (metricConfig.enableKamon) Seq(prometheusRecorder, KamonRecorder) else Seq(prometheusRecorder)
    val eventConsumer = EventConsumer(eventConsumerSettings(defaultConsumerConfig(config)), recorders, metricConfig)

    CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind, "shutdownConsumer") { () =>
      eventConsumer.shutdown()
    }
    val port = metricConfig.port
    val api = new PrometheusEventsApi(eventConsumer, prometheusRecorder)
    val httpBinding = Http().bindAndHandle(api.routes, "0.0.0.0", port)
    httpBinding.foreach(_ => log.info(s"Started the http server on http://localhost:$port"))(system.dispatcher)
    httpBinding
  }

  def eventConsumerSettings(config: Config): ConsumerSettings[String, String] =
    ConsumerSettings(config, new StringDeserializer, new StringDeserializer)

  def defaultConsumerConfig(globalConfig: Config): Config = globalConfig.getConfig("akka.kafka.consumer")
} 
Example 31
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
} 
Example 32
Source File: KafkaClient.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

} 
Example 33
Source File: InitialSpec.scala    From embedded-kafka   with Apache License 2.0 5 votes vote down vote up
package com.tuplejump.embedded.kafka

import java.util.concurrent.{TimeUnit, CountDownLatch}

import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.concurrent.PatienceConfiguration.Timeout
import org.scalatest.time.{Millis, Span}

class InitialSpec extends AbstractSpec with Eventually with Logging {

  private val timeout = Timeout(Span(10000, Millis))

  "Initially, EmbeddedKafka" must {
    val kafka = new EmbeddedKafka()
    val topic = "test"
    val total = 1000
    val latch = new CountDownLatch(total)

    "start embedded zookeeper and embedded kafka" in {
      kafka.isRunning should be (false)
      kafka.start()
      eventually(timeout)(kafka.isRunning)
    }
    "create a topic" in {
      kafka.createTopic(topic, 1, 1)
    }
    "publish messages to the embedded kafka instance" in {
      val config = kafka.consumerConfig(
        group = "some.group",
        kafkaConnect = kafka.kafkaConfig.hostName + ":" + kafka.kafkaConfig.port,
        zkConnect = kafka.kafkaConfig.zkConnect,
        offsetPolicy = "largest",//latest with new consumer
        autoCommitEnabled = true,
        kDeserializer = classOf[StringDeserializer],
        vDeserializer = classOf[StringDeserializer])
      val consumer = new SimpleConsumer(latch, config, topic, "consumer.group", 1, 1)

      val batch1 = for (n <- 0 until total) yield s"message-test-$n"

      logger.info(s"Publishing ${batch1.size} messages...")

      kafka.sendMessages(topic, batch1)
      latch.await(10000, TimeUnit.MILLISECONDS)
      latch.getCount should be (0)

      consumer.shutdown()
    }
    "shut down relatively cleanly for now" in {
      kafka.shutdown()
      eventually(timeout)(!kafka.isRunning)
    }
  }
} 
Example 34
Source File: KafkaUtility.scala    From real-time-stream-processing-engine   with Apache License 2.0 5 votes vote down vote up
package com.knoldus.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}


object KafkaUtility {

  //TODO It should read from config
  private val kafkaParams = Map(
    "bootstrap.servers" -> "localhost:9092",
    "key.deserializer" -> classOf[StringDeserializer],
    "value.deserializer" -> classOf[StringDeserializer],
    "auto.offset.reset" -> "earliest",
    "group.id" -> "tweet-consumer"
  )

  private val preferredHosts = LocationStrategies.PreferConsistent


  def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] =
    KafkaUtils.createDirectStream[String, String](
      ssc,
      preferredHosts,
      ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams)
    )

} 
Example 35
Source File: Streams.scala    From haystack-trends   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trends.kstream

import java.util.function.Supplier

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.commons.kstreams.serde.metricdata.{MetricDataSerde, MetricTankSerde}
import com.expedia.www.haystack.trends.aggregation.TrendMetric
import com.expedia.www.haystack.trends.config.AppConfiguration
import com.expedia.www.haystack.trends.kstream.processor.{AdditionalTagsProcessorSupplier, ExternalKafkaProcessorSupplier, MetricAggProcessorSupplier}
import com.expedia.www.haystack.trends.kstream.store.HaystackStoreBuilder
import org.apache.kafka.common.serialization.{Serde, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.Topology
import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters

class Streams(appConfiguration: AppConfiguration) extends Supplier[Topology] {

  private val LOGGER = LoggerFactory.getLogger(classOf[Streams])
  private val TOPOLOGY_SOURCE_NAME = "metricpoint-source"
  private val TOPOLOGY_EXTERNAL_SINK_NAME = "metricpoint-aggegated-sink-external"
  private val TOPOLOGY_INTERNAL_SINK_NAME = "metric-data-aggegated-sink-internal"
  private val TOPOLOGY_AGGREGATOR_PROCESSOR_NAME = "metricpoint-aggregator-process"
  private val TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME = "additional-tags-process"
  private val TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME = "trend-metric-store"
  private val kafkaConfig = appConfiguration.kafkaConfig

  private def initialize(topology: Topology): Topology = {

    //add source - topic where the raw metricpoints are pushed by the span-timeseries-transformer
    topology.addSource(
      kafkaConfig.autoOffsetReset,
      TOPOLOGY_SOURCE_NAME,
      kafkaConfig.timestampExtractor,
      new StringDeserializer,
      new MetricTankSerde().deserializer(),
      kafkaConfig.consumeTopic)


    //The processor which performs aggregations on the metrics
    topology.addProcessor(
      TOPOLOGY_AGGREGATOR_PROCESSOR_NAME,
      new MetricAggProcessorSupplier(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, appConfiguration.encoder),
      TOPOLOGY_SOURCE_NAME)


    //key-value, state store associated with each kstreams task(partition)
    // which keeps the trend-metrics which are currently being computed in memory
    topology.addStateStore(createTrendMetricStateStore(), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    // topology to add additional tags if any
    topology.addProcessor(TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME, new AdditionalTagsProcessorSupplier(appConfiguration.additionalTags), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    if (appConfiguration.kafkaConfig.producerConfig.enableExternalKafka) {
      topology.addProcessor(
        TOPOLOGY_EXTERNAL_SINK_NAME,
        new ExternalKafkaProcessorSupplier(appConfiguration.kafkaConfig.producerConfig),
        TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME
        )
    }

    // adding sinks
    appConfiguration.kafkaConfig.producerConfig.kafkaSinkTopics.foreach(sinkTopic => {
      if(sinkTopic.enabled){
        val serde = Class.forName(sinkTopic.serdeClassName).newInstance().asInstanceOf[Serde[MetricData]]
        topology.addSink(
          s"${TOPOLOGY_INTERNAL_SINK_NAME}-${sinkTopic.topic}",
          sinkTopic.topic,
          new StringSerializer,
          serde.serializer(),
          TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME)
      }
    })

    topology
  }


  private def createTrendMetricStateStore(): StoreBuilder[KeyValueStore[String, TrendMetric]] = {

    val stateStoreConfiguration = appConfiguration.stateStoreConfig

    val storeBuilder = new HaystackStoreBuilder(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, stateStoreConfiguration.stateStoreCacheSize)

    if (stateStoreConfiguration.enableChangeLogging) {
      storeBuilder
        .withLoggingEnabled(JavaConverters.mapAsJavaMap(stateStoreConfiguration.changeLogTopicConfiguration))

    } else {
      storeBuilder
        .withLoggingDisabled()
    }
  }


  override def get(): Topology = {
    val topology = new Topology
    initialize(topology)
  }
} 
Example 36
Source File: WordCountTestableSpec.scala    From kafka-streams   with Apache License 2.0 5 votes vote down vote up
package com.supergloo.examples

import com.supergloo.WordCountTestable
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.kafka.streams.TopologyTestDriver
import org.apache.kafka.streams.state.KeyValueStore
import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.scalatest.{FlatSpec, Matchers}

class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup {

  val wordCountApplication = new WordCountTestable

  "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in {
    val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello, WORLDY, World worlD Test"
    driver.pipeInput(recordFactory.create(words))
    val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer())
    record.value() shouldBe words.toLowerCase
    driver.close()
  }

  "WordCountTestable" should "count number of words" in {
    val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello Kafka Streams, All streams lead to Kafka"
    driver.pipeInput(recordFactory.create(words))
    val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store")
    store.get("hello") shouldBe 1
    store.get("kafka") shouldBe 2
    store.get("streams") shouldBe 2
    store.get("lead") shouldBe 1
    store.get("to") shouldBe 1
    driver.close()

  }

} 
Example 37
Source File: TheFlashTweetsConsumer.scala    From KafkaPlayground   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.pedrovgs.kafkaplayground.flash

import cakesolutions.kafka.KafkaConsumer.Conf
import com.github.pedrovgs.kafkaplayground.flash.elasticsearch.ElasticClient
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration._

object TheFlashTweetsConsumer {
  private val groupId = "kafka-elasticsearch-consumer"
}

class TheFlashTweetsConsumer(private val brokerAddress: String,
                             private val topic: String,
                             private val elasticClient: ElasticClient) {

  import TheFlashTweetsConsumer._

  private val consumer = cakesolutions.kafka.KafkaConsumer(
    Conf(
      bootstrapServers = brokerAddress,
      keyDeserializer = new StringDeserializer(),
      valueDeserializer = new StringDeserializer(),
      groupId = s"$topic-$groupId",
      autoOffsetReset = OffsetResetStrategy.EARLIEST
    )
  )
  consumer.subscribe(List(topic).asJava)

  def poll(): Unit = {
    println(s"Polling messages from the kafka consumer at topic: $topic.")
    val records = consumer.poll(10.seconds.toMillis)
    println(s"We've fetched ${records.count()} records.")
    records.forEach { record =>
      val id      = s"${record.topic()}_${record.partition()}_${record.offset()}"
      val content = record.value()
      println(s"Saving content from the topic $topic content into elastic: $content")
      elasticClient.insertOrUpdate(id, content)
    }
  }

} 
Example 38
Source File: EmbeddedKafkaServer.scala    From KafkaPlayground   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.pedrovgs.kafkaplayground.utils

import cakesolutions.kafka.KafkaProducerRecord
import cakesolutions.kafka.testkit.KafkaServer
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfter, Suite}

import scala.concurrent.duration._

trait EmbeddedKafkaServer extends BeforeAndAfter {
  this: Suite =>

  private var kafkaServer: KafkaServer = _

  before {
    kafkaServer = new KafkaServer
    startKafkaServer()
  }

  after {
    stopKafkaServer()
  }

  def startKafkaServer(): Unit = kafkaServer.startup()

  def stopKafkaServer(): Unit = kafkaServer.close()

  def kafkaServerAddress(): String = s"localhost:${kafkaServer.kafkaPort}"

  def zookeeperServerAddress(): String = s"localhost:${kafkaServer.zookeeperPort}"

  def recordsForTopic(topic: String, expectedNumberOfRecords: Int = 1): Iterable[String] =
    kafkaServer
      .consume[String, String](
        topic = topic,
        keyDeserializer = new StringDeserializer,
        valueDeserializer = new StringDeserializer,
        expectedNumOfRecords = expectedNumberOfRecords,
        timeout = 10.seconds.toMillis
      )
      .map(_._2)

  def produceMessage(topic: String, content: String): Unit =
    kafkaServer.produce(
      topic = topic,
      records = Seq(KafkaProducerRecord[String, String](topic = topic, value = content)),
      keySerializer = new StringSerializer(),
      valueSerializer = new StringSerializer()
    )

} 
Example 39
Source File: KafkaWirings.scala    From ticket-booking-aecor   with Apache License 2.0 5 votes vote down vote up
package ru.pavkin.booking

import aecor.data.{ Committable, ConsumerId }
import cats.data.NonEmptyList
import cats.effect._
import fs2.kafka.{ AutoOffsetReset, ConsumerSettings, _ }
import org.apache.kafka.common.serialization.StringDeserializer
import ru.pavkin.payment.event.PaymentReceived
import ru.pavkin.payment.kafka.PaymentReceivedEventDeserializer

import scala.concurrent.ExecutionContext

final class KafkaWirings[F[_]](
  val paymentReceivedEventStream: ConsumerId => fs2.Stream[F, Committable[F, PaymentReceived]]
)

object KafkaWirings {

  def apply[F[_]: ConcurrentEffect: ContextShift: Timer]: KafkaWirings[F] = {

    def paymentReceivedEventStream(
      consumerId: ConsumerId
    ): fs2.Stream[F, Committable[F, PaymentReceived]] =
      for {
        executionContext <- consumerExecutionContextStream[F]
        settings = bookingPaymentProcessSourceSettings(executionContext).withGroupId(
          consumerId.value
        )
        consumer <- consumerStream[F].using(settings)
        _ <- consumer.subscribe(paymentReceivedTopic)
        stream <- consumer.stream.map(
                   m => Committable(m.committableOffset.commit, m.record.value())
                 )
      } yield stream

    new KafkaWirings[F](paymentReceivedEventStream)
  }

  def bookingPaymentProcessSourceSettings(
    ec: ExecutionContext
  ): ConsumerSettings[String, PaymentReceived] =
    ConsumerSettings(
      keyDeserializer = new StringDeserializer,
      valueDeserializer = new PaymentReceivedEventDeserializer,
      executionContext = ec
    ).withAutoOffsetReset(AutoOffsetReset.Earliest).withBootstrapServers("0.0.0.0:9092")

  val paymentReceivedTopic: NonEmptyList[String] = NonEmptyList.one("PaymentReceived")
} 
Example 40
Source File: KafkaTestClient.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
}