org.apache.kafka.common.serialization.StringSerializer Scala Example

Source File: package.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example

import java.util.Properties

import org.apache.kafka.common.serialization.StringSerializer

package object writer {

  val topic = "first_topic"
  val numbersProducerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )

  implicit def buildPropertiesFromMap(properties: Map[String, String]): Properties =
    (new Properties /: properties) {
      case (a, (k, v)) =>
        a.put(k,v)
        a
    }

  def setupLogging(): Unit = {
    import org.apache.log4j.{Level, Logger}
    val rootLogger = Logger.getRootLogger
    rootLogger.setLevel(Level.ERROR)
  }

}

Source File: WordCountTestableSpec.scala From kafka-streams with Apache License 2.0

5 votes

package com.supergloo.examples

import com.supergloo.WordCountTestable
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.kafka.streams.TopologyTestDriver
import org.apache.kafka.streams.state.KeyValueStore
import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.scalatest.{FlatSpec, Matchers}

class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup {

  val wordCountApplication = new WordCountTestable

  "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in {
    val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello, WORLDY, World worlD Test"
    driver.pipeInput(recordFactory.create(words))
    val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer())
    record.value() shouldBe words.toLowerCase
    driver.close()
  }

  "WordCountTestable" should "count number of words" in {
    val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello Kafka Streams, All streams lead to Kafka"
    driver.pipeInput(recordFactory.create(words))
    val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store")
    store.get("hello") shouldBe 1
    store.get("kafka") shouldBe 2
    store.get("streams") shouldBe 2
    store.get("lead") shouldBe 1
    store.get("to") shouldBe 1
    driver.close()

  }

}

Source File: TheFlashTweetsProducer.scala From KafkaPlayground with GNU General Public License v3.0

5 votes

package com.github.pedrovgs.kafkaplayground.flash

import cakesolutions.kafka.KafkaProducer.Conf
import cakesolutions.kafka.{KafkaProducer, KafkaProducerRecord}
import com.danielasfregola.twitter4s.entities.{Geo, Tweet}
import org.apache.commons.lang.StringEscapeUtils
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ExecutionContext, Future}

object TheFlashTweetsProducer {
  private val unknownLocationFlashTopic = "the-flash-tweets"
  private val locatedFlashTopic         = "the-flash-tweets-with-location"
}

class TheFlashTweetsProducer(private val brokerAddress: String,
                             implicit val ec: ExecutionContext = ExecutionContext.global) {

  import TheFlashTweetsProducer._

  private val flashProducer = KafkaProducer(
    Conf(
      keySerializer = new StringSerializer(),
      valueSerializer = new StringSerializer(),
      bootstrapServers = brokerAddress,
      enableIdempotence = true,
      lingerMs = 20,
      batchSize = 32 * 1024
    ).withProperty("compression.type", "snappy")
  )

  def apply(tweet: Tweet): Future[Tweet] = {
    println(s"Sending tweet to the associated topic: ${tweet.text}")
    tweet.geo match {
      case Some(coordinates) => sendGeoLocatedFlashAdvertisement(tweet, coordinates)
      case _                 => sendUnknownLocationFlashAdvertisement(tweet)
    }
  }

  private def sendGeoLocatedFlashAdvertisement(tweet: Tweet, coordinates: Geo): Future[Tweet] =
    sendRecordToProducer(
      topic = locatedFlashTopic,
      message = s"""
           |{
           |  "latitude": ${coordinates.coordinates.head},
           |  "longitude": ${coordinates.coordinates.last},
           |  "id": "${tweet.id}",
           |  "message": "${StringEscapeUtils.escapeJava(tweet.text)}"
           |}
       """.stripMargin
    ).map(_ => tweet)

  private def sendUnknownLocationFlashAdvertisement(tweet: Tweet): Future[Tweet] =
    sendRecordToProducer(
      topic = unknownLocationFlashTopic,
      message = s"""
           |{
           |  "message": "${StringEscapeUtils.escapeJava(tweet.text)}"
           |}
        """.stripMargin
    ).map(_ => tweet)

  private def sendRecordToProducer(topic: String, message: String) =
    flashProducer.send(
      KafkaProducerRecord[String, String](topic = topic, value = message)
    )
}

Source File: EmbeddedKafkaServer.scala From KafkaPlayground with GNU General Public License v3.0

5 votes

package com.github.pedrovgs.kafkaplayground.utils

import cakesolutions.kafka.KafkaProducerRecord
import cakesolutions.kafka.testkit.KafkaServer
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfter, Suite}

import scala.concurrent.duration._

trait EmbeddedKafkaServer extends BeforeAndAfter {
  this: Suite =>

  private var kafkaServer: KafkaServer = _

  before {
    kafkaServer = new KafkaServer
    startKafkaServer()
  }

  after {
    stopKafkaServer()
  }

  def startKafkaServer(): Unit = kafkaServer.startup()

  def stopKafkaServer(): Unit = kafkaServer.close()

  def kafkaServerAddress(): String = s"localhost:${kafkaServer.kafkaPort}"

  def zookeeperServerAddress(): String = s"localhost:${kafkaServer.zookeeperPort}"

  def recordsForTopic(topic: String, expectedNumberOfRecords: Int = 1): Iterable[String] =
    kafkaServer
      .consume[String, String](
        topic = topic,
        keyDeserializer = new StringDeserializer,
        valueDeserializer = new StringDeserializer,
        expectedNumOfRecords = expectedNumberOfRecords,
        timeout = 10.seconds.toMillis
      )
      .map(_._2)

  def produceMessage(topic: String, content: String): Unit =
    kafkaServer.produce(
      topic = topic,
      records = Seq(KafkaProducerRecord[String, String](topic = topic, value = content)),
      keySerializer = new StringSerializer(),
      valueSerializer = new StringSerializer()
    )

}

Source File: KafkaJsonSerializer.scala From ticket-booking-aecor with Apache License 2.0

5 votes

package ru.pavkin.payment.kafka
import java.nio.charset.StandardCharsets
import java.util

import io.circe.parser._
import io.circe.Encoder
import org.apache.kafka.common.serialization.{ Deserializer, Serializer, StringSerializer }
import ru.pavkin.payment.event.PaymentReceived

class PaymentReceivedEventSerializer extends Serializer[PaymentReceived] {
  private val stringSerializer = new StringSerializer

  def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  def serialize(topic: String, data: PaymentReceived): Array[Byte] =
    stringSerializer.serialize(topic, Encoder[PaymentReceived].apply(data).noSpaces)

  def close(): Unit = ()
}

class PaymentReceivedEventDeserializer extends Deserializer[PaymentReceived] {
  def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  def close(): Unit = ()

  def deserialize(topic: String, data: Array[Byte]): PaymentReceived =
    if (data ne null)
      decode[PaymentReceived](new String(data, StandardCharsets.UTF_8)).fold(throw _, identity)
    else null

}

Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
}

Source File: KafkaClient.scala From mist with Apache License 2.0

5 votes

package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

}

Source File: TestProducer.scala From asura with MIT License

5 votes

package asura.kafka.producer

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.Future

object TestProducer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    logger.info("Start producer")

    implicit val system = ActorSystem("producer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer)
    val done: Future[Done] =
      Source(1 to 100)
        .map(value => new ProducerRecord[String, String]("test-topic", s"msg ${value}"))
        .runWith(Producer.plainSink(producerSettings))

    done onComplete {
      case scala.util.Success(_) => logger.info("Done"); system.terminate()
      case scala.util.Failure(err) => logger.error(err.toString); system.terminate()
    }
  }
}

Source File: package.scala From kafka-scala-api with Apache License 2.0

5 votes

package com

import java.util.Collections

import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import cakesolutions.kafka.KafkaProducer.Conf
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

package object example {

  val topic = "sample_topic"

  val kafkaProducer = KafkaProducer(
    Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = "localhost:9092")
  )

  val kafkaProducerConf = KafkaProducer.Conf(
    new StringSerializer, new StringSerializer,
    bootstrapServers = "localhost:9092"
  )

  val kafkaConsumerConf = KafkaConsumer.Conf(
    new StringDeserializer,
    new StringDeserializer,
    groupId = "test_group",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST,
    bootstrapServers = "localhost:9092")
}

Source File: package.scala From kafka-scala-api with Apache License 2.0

5 votes

package com

import akka.actor.ActorSystem
import akka.kafka.{ConsumerSettings, ProducerSettings}
import akka.stream.ActorMaterializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

package object example {
  implicit val system = ActorSystem("FlowProducerMain")
  implicit val materializer = ActorMaterializer()

  val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers("localhost:9092")

  val topic = "sample_topic"
  val topic1 = "topic1"
  val topic2 = "topic2"


  val consumerSettings =
    ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
}

Source File: PredictionLogger.scala From ForestFlow with Apache License 2.0

5 votes

package ai.forestflow.event.subscribers

import java.nio.ByteOrder

import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import ai.forestflow.serving.config.ApplicationEnvironment
import akka.actor.{Actor, ActorLogging, Props}
import akka.kafka.ProducerSettings
import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import graphpipe.InferRequest
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}
//import scalapb.json4s.JsonFormat

import scala.util.{Success, Try}

object PredictionLogger {
  

  private lazy val binaryProducerSettings =
    ProducerSettings(producerConfig, new StringSerializer, new ByteArraySerializer)
  private lazy val binaryProducer = binaryProducerSettings.createKafkaProducer()

  override def preStart(): Unit = {
    if (basic_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEvent])

    if (gp_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEventGP])
    super.preStart()
  }
  override def receive: Receive = {
    case event@PredictionEvent(prediction, servedRequest, inferenceRequest, loggingSettings) =>

      val key = loggingSettings
        .keyFeatures
        .flatMap(inferenceRequest.configs.get)
        .mkString(loggingSettings.getKeyFeaturesSeparator)

      if (key.length > 0 )
        binaryProducer.send(new ProducerRecord(basic_topic.get, key, event.toByteArray))
      else
        binaryProducer.send(new ProducerRecord(basic_topic.get, event.toByteArray))

    case event@PredictionEventGP(prediction, servedRequest, inferBytes, loggingSettings) =>
      Try {
        val req = graphpipe.Request.getRootAsRequest(inferBytes.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN))
        val inferRequest = req.req(new InferRequest()).asInstanceOf[InferRequest]
        val inferConfigs = inferRequest.config()
          .split(",")
          .map(_.split(":"))
          .flatMap{ case Array(k, v) =>  Some((k, v)) case _ => None}.toMap

        loggingSettings
          .keyFeatures
          .flatMap(inferConfigs.get)
          .mkString(loggingSettings.getKeyFeaturesSeparator)

      } match {
        case Success(key) =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, key, event.toByteArray))
        case _ =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, event.toByteArray))
      }

    case _ => // ignore
  }
}

Source File: KafkaReporter.scala From Swallow with Apache License 2.0

5 votes

package com.intel.hibench.common.streaming.metrics

import java.util.Properties

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}
import org.apache.kafka.common.serialization.StringSerializer


class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter {

  private val producer = ProducerSingleton.getInstance(bootstrapServers)

  override def report(startTime: Long, endTime: Long): Unit = {
    producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime"))
  }
}

object ProducerSingleton {
  @volatile private var instance : Option[KafkaProducer[String, String]] = None

  def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized {
    if (!instance.isDefined) {
      synchronized {
        if(!instance.isDefined) {
          val props = new Properties()
          props.put("bootstrap.servers", bootstrapServers)
          instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer))
        }
      }
    }
    instance.get
  }
}

Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0

5 votes

package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

}

Source File: ConsumerToProducer.scala From scala-kafka-client with MIT License

5 votes

package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka._
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.concurrent.duration._


  def apply(consumerConfig: Config, producerConfig: Config): ActorRef = {

    // Create KafkaConsumerActor config with bootstrap.servers specified in Typesafe config
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(consumerConfig)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds, 5)

    // Create KafkaProducerActor config with defaults and bootstrap.servers specified in Typesafe config
    val producerConf = KafkaProducer.Conf(new StringSerializer, new StringSerializer).withConf(producerConfig)

    val system = ActorSystem()
    system.actorOf(Props(new ConsumerToProducer(consumerConf, actorConf, producerConf)))
  }
}

class ConsumerToProducer(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf,
  producerConf: KafkaProducer.Conf[String, String]) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  // The KafkaConsumerActor
  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )
  context.watch(consumer)

  // The KafkaProducerActor
  private val producer = context.actorOf(KafkaProducerActor.props(producerConf))

  consumer ! Subscribe.AutoPartition(List("topic1"))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records)

    // Confirmed Offsets from KafkaProducer
    case o: Offsets =>
      consumer ! Confirm(o, commit = true)
  }

  // Demonstrates some transformation of the messages before forwarding to KafkaProducer
  private def processRecords(records: ConsumerRecords[String, String]) = {
    val transformedRecords = records.pairs.map { case (key, value) =>
      (key, value + ".")
    }

    // Send records to Topic2.  Offsets will be sent back to this actor once confirmed.
    producer ! ProducerRecords.fromKeyValues[String, String]("topic2", transformedRecords, Some(records.offsets), None)

    // Could have sent them like this if we didn't first transform:
    // producer ! ProducerRecords.fromConsumerRecords("topic2", records, None)
  }
}

Source File: KafkaConsumerPerfSpec.scala From scala-kafka-client with MIT License

5 votes

package cakesolutions.kafka

import cakesolutions.kafka.KafkaConsumer.Conf
import com.typesafe.config.ConfigFactory
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random


class KafkaConsumerPerfSpec extends FlatSpecLike
  with Matchers
  with BeforeAndAfterAll {

  val log = LoggerFactory.getLogger(getClass)

  val config = ConfigFactory.load()

  val msg1k = scala.io.Source.fromInputStream(getClass.getResourceAsStream("/1k.txt")).mkString

  val consumer = KafkaConsumer(
    Conf(config.getConfig("consumer"),
      new StringDeserializer,
      new StringDeserializer)
  )

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  "Kafka Consumer with single partition topic" should "perform" in {
    val topic = randomString
    val producerConf = KafkaProducer.Conf(config.getConfig("producer"), new StringSerializer, new StringSerializer)
    val producer = KafkaProducer[String, String](producerConf)

    1 to 100000 foreach { n =>
      producer.send(KafkaProducerRecord(topic, None, msg1k))
    }
    producer.flush()
    log.info("Delivered 100000 msg to topic {}", topic)

    consumer.subscribe(List(topic).asJava)

    var start = 0l

    var total = 0

    while (total < 100000) {
      if(total == 0)
        start = System.currentTimeMillis()
      val count = consumer.poll(1000).count()
      total += count
    }

    val totalTime = System.currentTimeMillis() - start
    val messagesPerSec = 100000 / totalTime * 1000
    log.info("Total Time millis : {}", totalTime)
    log.info("Messages per sec  : {}", messagesPerSec)

    totalTime should be < 4000L

    consumer.close()
    producer.close()
  }
}

Source File: IdempotentProducerSpec.scala From scala-kafka-client with MIT License

5 votes

package cakesolutions.kafka

import org.apache.kafka.clients.consumer.ConsumerRecords
import org.apache.kafka.common.KafkaException
import org.apache.kafka.common.requests.IsolationLevel
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random

class IdempotentProducerSpec extends KafkaIntSpec {
  private val log = LoggerFactory.getLogger(getClass)

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val idempotentProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      enableIdempotence = true)

  val transactionalProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      transactionalId = Some("t1"),
      enableIdempotence = true)

  val consumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false)

  val transactionConsumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false,
      isolationLevel = IsolationLevel.READ_COMMITTED)

  "Producer with idempotent config" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(idempotentProducerConfig)
    val consumer = KafkaConsumer(consumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)
    producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
    producer.flush()

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }

  "Producer with transaction" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(transactionalProducerConfig)
    val consumer = KafkaConsumer(transactionConsumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)

    producer.initTransactions()

    try {
      producer.beginTransaction()
      producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
      producer.commitTransaction()
    } catch {
      case ex: KafkaException =>
        log.error(ex.getMessage, ex)
        producer.abortTransaction()
    }

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }
}

Source File: KafkaProducerActorSpec.scala From scala-kafka-client with MIT License

5 votes

package cakesolutions.kafka.akka

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.util.Random

class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) {

  def this() = this(ActorSystem("KafkaProducerActorSpec"))

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val deserializer = new StringDeserializer
  val consumerConf = KafkaConsumer.Conf(
    deserializer, deserializer,
    bootstrapServers = s"localhost:$kafkaPort",
    groupId = "test",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST
  )

  val serializer = new StringSerializer
  val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort")

  "KafkaProducerActor" should "write a given batch to Kafka" in {
    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar"))
    val message = ProducerRecords(batch, Some('response))

    probe.send(producer, message)

    probe.expectMsg('response)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in {
    import scala.concurrent.duration._

    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar")
    )
    val message = ProducerRecords(batch)

    probe.send(producer, message)

    probe.expectNoMessage(3.seconds)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) =
    kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer)
}

Source File: UseCase.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License

5 votes

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, Source}
import com.softwaremill.react.kafka.KafkaMessages._
import org.apache.kafka.common.serialization.{StringSerializer, StringDeserializer}
import com.softwaremill.react.kafka.{ProducerMessage, ConsumerProperties, ProducerProperties, ReactiveKafka}
import org.reactivestreams.{ Publisher, Subscriber }

implicit val actorSystem = ActorSystem("ReactiveKafka")
implicit val materializer = ActorMaterializer()

val kafka = new ReactiveKafka()
val publisher: Publisher[StringConsumerRecord] = kafka.consume(ConsumerProperties(
 bootstrapServers = "localhost:9092",
 topic = "lowercaseStrings",
 groupId = "groupName",
 valueDeserializer = new StringDeserializer()
))

val subscriber: Subscriber[StringProducerMessage] = kafka.publish(ProducerProperties(
  bootstrapServers = "localhost:9092",
  topic = "uppercaseStrings",
  valueSerializer = new StringSerializer()
))

Source.fromPublisher(publisher).map(m => ProducerMessage(m.value().toUpperCase))
  .to(Sink.fromSubscriber(subscriber)).run()

Source File: ProducerSettings.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License

5 votes

import akka.kafka._
import akka.kafka.scaladsl._
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.common.serialization.ByteArraySerializer

val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers("localhost:9092")

Source(1 to 10000)
  .map(_.toString)
  .map(elem => new ProducerRecord[Array[Byte], String]("topic1", elem))
  .to(Producer.plainSink(producerSettings))

Source(1 to 10000).map(elem => ProducerMessage.Message(new ProducerRecord[Array[Byte], String]("topic1", elem.toString), elem))
    .via(Producer.flow(producerSettings))
    .map { result =>
      val record = result.message.record
      println(s"${record.topic}/${record.partition} ${result.offset}: ${record.value} (${result.message.passThrough}")
      result
    }

Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
}

Source File: WordCountProducer.scala From akka_streams_tutorial with MIT License

5 votes

package alpakka.kafka

import java.util
import java.util.concurrent.ThreadLocalRandom

import akka.actor.ActorSystem
import akka.kafka.ProducerMessage.Message
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ThrottleMode
import akka.stream.scaladsl.{Keep, Sink, Source}
import akka.{Done, NotUsed}
import org.apache.kafka.clients.producer.{Partitioner, ProducerRecord}
import org.apache.kafka.common.errors.{NetworkException, UnknownTopicOrPartitionException}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.common.{Cluster, PartitionInfo}

import scala.concurrent.Future
import scala.concurrent.duration._


class CustomPartitioner extends Partitioner {
  override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = {
    val partitionInfoList: util.List[PartitionInfo] = cluster.availablePartitionsForTopic(topic)
    val partitionCount = partitionInfoList.size
    val fakeNewsPartition = 0

    //println("CustomPartitioner received key: " + key + " and value: " + value)

    if (value.toString.contains(WordCountProducer.fakeNewsKeyword)) {
      //println("CustomPartitioner send message: " + value + " to fakeNewsPartition")
      fakeNewsPartition
    }
    else ThreadLocalRandom.current.nextInt(1, partitionCount) //round robin
  }

  override def close(): Unit = {
    println("CustomPartitioner: " + Thread.currentThread + " received close")
  }

  override def configure(configs: util.Map[String, _]): Unit = {
    println("CustomPartitioner received configure with configuration: " + configs)
  }
}

object CustomPartitioner {
  private def deserialize[V](objectData: Array[Byte]): V = org.apache.commons.lang3.SerializationUtils.deserialize(objectData).asInstanceOf[V]
}

Source File: SKRSpec.scala From spark-kafka-writer with Apache License 2.0

5 votes

package com.github.benfradet.spark.kafka.writer

import java.util.concurrent.atomic.AtomicInteger

import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.scalatest.concurrent.Eventually
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}

import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

case class Foo(a: Int, b: String)

trait SKRSpec
  extends AnyWordSpec
  with Matchers
  with BeforeAndAfterEach
  with BeforeAndAfterAll
  with Eventually {

  val sparkConf = new SparkConf()
    .setMaster("local[1]")
    .setAppName(getClass.getSimpleName)

  var ktu: KafkaTestUtils = _
  override def beforeAll(): Unit = {
    ktu = new KafkaTestUtils
    ktu.setup()
  }
  override def afterAll(): Unit = {
    SKRSpec.callbackTriggerCount.set(0)
    if (ktu != null) {
      ktu.tearDown()
      ktu = null
    }
  }

  var topic: String = _
  var ssc: StreamingContext = _
  var spark: SparkSession = _
  override def afterEach(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }
    if (spark != null) {
      spark.stop()
      spark = null
    }
  }
  override def beforeEach(): Unit = {
    ssc = new StreamingContext(sparkConf, Seconds(1))
    spark = SparkSession.builder
      .config(sparkConf)
      .getOrCreate()
    topic = s"topic-${Random.nextInt()}"
    ktu.createTopics(topic)
  }

  def collect(ssc: StreamingContext, topic: String): ArrayBuffer[String] = {
    val kafkaParams = Map(
      "bootstrap.servers" -> ktu.brokerAddress,
      "auto.offset.reset" -> "earliest",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "test-collect"
    )
    val results = new ArrayBuffer[String]
    KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](Set(topic), kafkaParams)
    ).map(_.value())
      .foreachRDD { rdd =>
        results ++= rdd.collect()
        ()
      }
    results
  }

  val producerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )
}

object SKRSpec {
  val callbackTriggerCount = new AtomicInteger()
}

Source File: KafkaProducerCacheSpec.scala From spark-kafka-writer with Apache License 2.0

5 votes

package com.github.benfradet.spark.kafka.writer

import com.google.common.cache.Cache
import org.apache.kafka.clients.producer._
import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.PrivateMethodTester

import scala.concurrent.duration._

class KafkaProducerCacheSpec extends SKRSpec with PrivateMethodTester {
  val cache = PrivateMethod[Cache[Seq[(String, Object)], KafkaProducer[_, _]]]('cache)
  val m1 = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )
  val m2 = m1 + ("acks" -> "0")

  override def beforeAll(): Unit = {
    super.beforeAll()
    KafkaProducerCache.invokePrivate(cache()).invalidateAll()
  }

  "A KafkaProducerCache" when {
    "calling getProducer" should {
      "create the producer if it doesn't exist and retrieve it if it exists" in {
        cacheSize shouldBe 0
        val p1 = KafkaProducerCache.getProducer[String, String](m1)
        cacheSize shouldBe 1
        val p2 = KafkaProducerCache.getProducer[String, String](m1)
        p1 shouldBe p2
        cacheSize shouldBe 1
      }
    }

    "closing a producer" should {
      "close the correct producer" in {
        cacheSize shouldBe 1
        val p1 = KafkaProducerCache.getProducer[String, String](m1)
        cacheSize shouldBe 1
        val p2 = KafkaProducerCache.getProducer[String, String](m2)
        cacheSize shouldBe 2
        p1 should not be p2
        KafkaProducerCache.close(m1)
        cacheSize shouldBe 1
      }
    }
  }

  private def cacheSize: Int = KafkaProducerCache.invokePrivate(cache()).asMap.size
}

Source File: EventProducer.scala From rokku with Apache License 2.0

5 votes

package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
}

Source File: ProducerStream.scala From reactive-kafka-microservice-template with Apache License 2.0

5 votes

package com.omearac.producers

import akka.actor.{ActorRef, ActorSystem}
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.OverflowStrategy
import akka.stream.scaladsl.{Flow, Source}
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.{AkkaStreams, EventSourcing}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}



trait ProducerStream extends AkkaStreams with EventSourcing {
    implicit val system: ActorSystem
    def self: ActorRef

    def createStreamSource[msgType] = {
        Source.queue[msgType](Int.MaxValue,OverflowStrategy.backpressure)
    }

    def createStreamSink(producerProperties: Map[String, String]) = {
        val kafkaMBAddress = producerProperties("bootstrap-servers")
        val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers(kafkaMBAddress)

        Producer.plainSink(producerSettings)
    }

    def createStreamFlow[msgType: Conversion](producerProperties: Map[String, String]) = {
        val numberOfPartitions = producerProperties("num.partitions").toInt -1
        val topicToPublish = producerProperties("publish-topic")
        val rand = new scala.util.Random
        val range = 0 to numberOfPartitions

        Flow[msgType].map { msg =>
            val partition = range(rand.nextInt(range.length))
            val stringJSONMessage = Conversion[msgType].convertToJson(msg)
            new ProducerRecord[Array[Byte], String](topicToPublish, partition, null, stringJSONMessage)
        }
    }
}

Source File: KafkaProducerConnector.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.connector.kafka

import akka.actor.ActorSystem
import akka.pattern.after
import org.apache.kafka.clients.producer._
import org.apache.kafka.common.errors._
import org.apache.kafka.common.serialization.StringSerializer
import pureconfig._
import pureconfig.generic.auto._
import org.apache.openwhisk.common.{Counter, Logging, TransactionId}
import org.apache.openwhisk.connector.kafka.KafkaConfiguration._
import org.apache.openwhisk.core.ConfigKeys
import org.apache.openwhisk.core.connector.{Message, MessageProducer}
import org.apache.openwhisk.core.entity.{ByteSize, UUIDs}
import org.apache.openwhisk.utils.Exceptions

import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.concurrent.{blocking, ExecutionContext, Future, Promise}
import scala.util.{Failure, Success}

class KafkaProducerConnector(
  kafkahosts: String,
  id: String = UUIDs.randomUUID().toString,
  maxRequestSize: Option[ByteSize] = None)(implicit logging: Logging, actorSystem: ActorSystem)
    extends MessageProducer
    with Exceptions {

  implicit val ec: ExecutionContext = actorSystem.dispatcher
  private val gracefulWaitTime = 100.milliseconds

  override def sentCount(): Long = sentCounter.cur

  
  override def close(): Unit = {
    logging.info(this, "closing producer")
    producer.close()
  }

  private val sentCounter = new Counter()

  private def createProducer(): KafkaProducer[String, String] = {
    val config = Map(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> kafkahosts) ++
      configMapToKafkaConfig(loadConfigOrThrow[Map[String, String]](ConfigKeys.kafkaCommon)) ++
      configMapToKafkaConfig(loadConfigOrThrow[Map[String, String]](ConfigKeys.kafkaProducer)) ++
      (maxRequestSize map { max =>
        Map("max.request.size" -> max.size.toString)
      } getOrElse Map.empty)

    verifyConfig(config, ProducerConfig.configNames().asScala.toSet)

    tryAndThrow("creating producer")(new KafkaProducer(config, new StringSerializer, new StringSerializer))
  }

  private def recreateProducer(): Unit = {
    logging.info(this, s"recreating producer")
    tryAndSwallow("closing old producer")(producer.close())
    logging.info(this, s"old producer closed")
    producer = createProducer()
  }

  @volatile private var producer = createProducer()
}

Source File: CacheInvalidator.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.cosmosdb.cache

import akka.Done
import akka.actor.{ActorSystem, CoordinatedShutdown}
import akka.kafka.ProducerSettings
import akka.stream.ActorMaterializer
import com.google.common.base.Throwables
import com.typesafe.config.Config
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.openwhisk.common.Logging
import org.apache.openwhisk.core.database.RemoteCacheInvalidation.cacheInvalidationTopic

import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success}

object CacheInvalidator {

  val instanceId = "cache-invalidator"
  val whisksCollection = "whisks"

  def start(
    globalConfig: Config)(implicit system: ActorSystem, materializer: ActorMaterializer, log: Logging): Future[Done] = {
    implicit val ec: ExecutionContext = system.dispatcher
    val config = CacheInvalidatorConfig(globalConfig)
    val producer =
      KafkaEventProducer(
        kafkaProducerSettings(defaultProducerConfig(globalConfig)),
        cacheInvalidationTopic,
        config.eventProducerConfig)
    val observer = new WhiskChangeEventObserver(config.invalidatorConfig, producer)
    val feedConsumer = new ChangeFeedConsumer(whisksCollection, config, observer)
    feedConsumer.isStarted.andThen {
      case Success(_) =>
        registerShutdownTasks(system, feedConsumer, producer)
        log.info(this, s"Started the Cache invalidator service. ClusterId [${config.invalidatorConfig.clusterId}]")
      case Failure(t) =>
        log.error(this, "Error occurred while starting the Consumer" + Throwables.getStackTraceAsString(t))
    }
  }

  private def registerShutdownTasks(system: ActorSystem,
                                    feedConsumer: ChangeFeedConsumer,
                                    producer: KafkaEventProducer)(implicit ec: ExecutionContext, log: Logging): Unit = {
    CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind, "closeFeedListeners") { () =>
      feedConsumer
        .close()
        .flatMap { _ =>
          producer.close().andThen {
            case Success(_) =>
              log.info(this, "Kafka producer successfully shutdown")
          }
        }
    }
  }

  def kafkaProducerSettings(config: Config): ProducerSettings[String, String] =
    ProducerSettings(config, new StringSerializer, new StringSerializer)

  def defaultProducerConfig(globalConfig: Config): Config = globalConfig.getConfig("akka.kafka.producer")

}

Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
}

Source File: EmbeddedKafkaUnavailableSpec.scala From scalatest-embedded-kafka with MIT License

5 votes

package net.manub.embeddedkafka

import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.BeforeAndAfterAll
import org.scalatest.tagobjects.Slow

class EmbeddedKafkaUnavailableSpec
    extends EmbeddedKafkaSpecSupport
    with EmbeddedKafka
    with BeforeAndAfterAll {

  "the publishToKafka method" should {
    "throw a KafkaUnavailableException when Kafka is unavailable when trying to publish" in {
      a[KafkaUnavailableException] shouldBe thrownBy {
        implicit val serializer = new StringSerializer()
        publishToKafka("non_existing_topic", "a message")
      }
    }
  }

  "the consumeFirstStringMessageFrom method" should {
    "throw a KafkaUnavailableException when there's no running instance of Kafka" taggedAs Slow ignore {
      // TODO: This test is *really* slow. The request.max.timeout.ms in the underlying consumer should be changed.
      a[KafkaUnavailableException] shouldBe thrownBy {
        consumeFirstStringMessageFrom("non_existing_topic")
      }
    }
  }
}

Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
}

Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
}

Source File: EmbeddedKafkaUnavailableSpec.scala From embedded-kafka with MIT License

5 votes

package net.manub.embeddedkafka

import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.BeforeAndAfterAll
import org.scalatest.tagobjects.Slow

class EmbeddedKafkaUnavailableSpec
    extends EmbeddedKafkaSpecSupport
    with BeforeAndAfterAll {
  "the publishToKafka method" should {
    "throw a KafkaUnavailableException when Kafka is unavailable when trying to publish" in {
      a[KafkaUnavailableException] shouldBe thrownBy {
        implicit val serializer: StringSerializer = new StringSerializer()
        publishToKafka("non_existing_topic", "a message")
      }
    }
  }

  "the consumeFirstStringMessageFrom method" should {
    "throw a KafkaUnavailableException when there's no running instance of Kafka" taggedAs Slow ignore {
      // TODO: This test is *really* slow. The request.max.timeout.ms in the underlying consumer should be changed.
      a[KafkaUnavailableException] shouldBe thrownBy {
        consumeFirstStringMessageFrom("non_existing_topic")
      }
    }
  }
}

Source File: KafkaOutput.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.plugin.output.kafka

import java.io.{Serializable => JSerializable}
import java.util.Properties

import com.stratio.sparta.plugin.input.kafka.KafkaBase
import com.stratio.sparta.sdk.pipeline.output.Output._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import com.stratio.sparta.sdk.properties.CustomProperties
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.kafka.clients.producer.ProducerConfig._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.sql._

import scala.collection.mutable

class KafkaOutput(name: String, properties: Map[String, JSerializable])
  extends Output(name, properties) with KafkaBase with CustomProperties {

  val DefaultKafkaSerializer = classOf[StringSerializer].getName
  val DefaultAck = "0"
  val DefaultBatchNumMessages = "200"
  val DefaultProducerPort = "9092"

  override val customKey = "KafkaProperties"
  override val customPropertyKey = "kafkaPropertyKey"
  override val customPropertyValue = "kafkaPropertyValue"

  val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase)
  val rowSeparator = properties.getString("rowSeparator", ",")

  override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append)

  override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = {
    val tableName = getTableNameFromOptions(options)

    validateSaveMode(saveMode)

    outputFormat match {
      case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages =>
        messages.foreach(message => send(tableName, message.mkString(rowSeparator))))
      case _ => dataFrame.toJSON.foreachPartition { messages =>
        messages.foreach(message => send(tableName, message))
      }
    }
  }

  def send(topic: String, message: String): Unit = {
    val record = new ProducerRecord[String, String](topic, message)
    KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record)
  }

  private[kafka] def getProducerConnectionKey: String =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort)
      .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list"))

  private[kafka] def createProducerProps: Properties = {
    val props = new Properties()
    properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) }
    mandatoryOptions.foreach { case (key, value) => props.put(key, value) }
    getCustomProperties.foreach { case (key, value) => props.put(key, value) }
    props
  }

  private[kafka] def mandatoryOptions: Map[String, String] =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++
      Map(
        KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck),
        BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages)
      )

  override def cleanUp(options: Map[String, String]): Unit = {
    log.info(s"Closing Kafka producer in Kafka Output: $name")
    KafkaOutput.closeProducers()
  }
}

object KafkaOutput {

  private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty

  def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = {
    getInstance(producerKey, properties)
  }

  def closeProducers(): Unit = {
    producers.values.foreach(producer => producer.close())
  }

  private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = {
    producers.getOrElse(key, {
      val producer = new KafkaProducer[String, String](properties)
      producers.put(key, producer)
      producer
    })
  }
}

Source File: ProcessingKafkaApplication.scala From Akka-Cookbook with MIT License

5 votes

package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
}

Source File: EventAggregationSpec.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
}

Source File: KafkaBean.scala From estuary with Apache License 2.0

5 votes

package com.neighborhood.aka.laplace.estuary.bean.datasink


import com.neighborhood.aka.laplace.estuary.bean.key._
import com.neighborhood.aka.laplace.estuary.core.sink.mysql.MysqlSinkFunc
import org.apache.kafka.common.serialization.StringSerializer


  var isSync = false

  var maxInFlightRequestsPerConnection = "1"

}

object KafkaBean {
  def buildConfig(kafkaBean: KafkaBean): java.util.HashMap[String, String] = {
    val config: java.util.HashMap[String, String] = new java.util.HashMap[String, String]()
    config.put("acks", kafkaBean.ack)
    config.put("linger.ms", kafkaBean.lingerMs)
    config.put("retries", kafkaBean.kafkaRetries)
    config.put("bootstrap.servers", kafkaBean.bootstrapServers)
    config.put("max.block.ms", kafkaBean.maxBlockMs)
    config.put("max.request.size", kafkaBean.maxRequestSize)
    config.put("request.timeout.ms", kafkaBean.requestTimeoutMs)
    config.put("key.serializer", kafkaBean.keySerializer)
    config.put("value.serializer", kafkaBean.valueSerializer)
    config.put("partitioner.class", kafkaBean.partitionerClass)
    config.put("compression.type", kafkaBean.compressionType)
    config.put("batch.size", kafkaBean.batchSize)
    config.put("retry.backoff.ms", kafkaBean.retryBackoffMs)
    config.put("max.in.flight.requests.per.connection", kafkaBean.maxInFlightRequestsPerConnection)
    config
  }
}

Source File: Streams.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.kstream

import java.util.function.Supplier

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.commons.kstreams.serde.metricdata.{MetricDataSerde, MetricTankSerde}
import com.expedia.www.haystack.trends.aggregation.TrendMetric
import com.expedia.www.haystack.trends.config.AppConfiguration
import com.expedia.www.haystack.trends.kstream.processor.{AdditionalTagsProcessorSupplier, ExternalKafkaProcessorSupplier, MetricAggProcessorSupplier}
import com.expedia.www.haystack.trends.kstream.store.HaystackStoreBuilder
import org.apache.kafka.common.serialization.{Serde, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.Topology
import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters

class Streams(appConfiguration: AppConfiguration) extends Supplier[Topology] {

  private val LOGGER = LoggerFactory.getLogger(classOf[Streams])
  private val TOPOLOGY_SOURCE_NAME = "metricpoint-source"
  private val TOPOLOGY_EXTERNAL_SINK_NAME = "metricpoint-aggegated-sink-external"
  private val TOPOLOGY_INTERNAL_SINK_NAME = "metric-data-aggegated-sink-internal"
  private val TOPOLOGY_AGGREGATOR_PROCESSOR_NAME = "metricpoint-aggregator-process"
  private val TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME = "additional-tags-process"
  private val TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME = "trend-metric-store"
  private val kafkaConfig = appConfiguration.kafkaConfig

  private def initialize(topology: Topology): Topology = {

    //add source - topic where the raw metricpoints are pushed by the span-timeseries-transformer
    topology.addSource(
      kafkaConfig.autoOffsetReset,
      TOPOLOGY_SOURCE_NAME,
      kafkaConfig.timestampExtractor,
      new StringDeserializer,
      new MetricTankSerde().deserializer(),
      kafkaConfig.consumeTopic)


    //The processor which performs aggregations on the metrics
    topology.addProcessor(
      TOPOLOGY_AGGREGATOR_PROCESSOR_NAME,
      new MetricAggProcessorSupplier(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, appConfiguration.encoder),
      TOPOLOGY_SOURCE_NAME)


    //key-value, state store associated with each kstreams task(partition)
    // which keeps the trend-metrics which are currently being computed in memory
    topology.addStateStore(createTrendMetricStateStore(), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    // topology to add additional tags if any
    topology.addProcessor(TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME, new AdditionalTagsProcessorSupplier(appConfiguration.additionalTags), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    if (appConfiguration.kafkaConfig.producerConfig.enableExternalKafka) {
      topology.addProcessor(
        TOPOLOGY_EXTERNAL_SINK_NAME,
        new ExternalKafkaProcessorSupplier(appConfiguration.kafkaConfig.producerConfig),
        TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME
        )
    }

    // adding sinks
    appConfiguration.kafkaConfig.producerConfig.kafkaSinkTopics.foreach(sinkTopic => {
      if(sinkTopic.enabled){
        val serde = Class.forName(sinkTopic.serdeClassName).newInstance().asInstanceOf[Serde[MetricData]]
        topology.addSink(
          s"${TOPOLOGY_INTERNAL_SINK_NAME}-${sinkTopic.topic}",
          sinkTopic.topic,
          new StringSerializer,
          serde.serializer(),
          TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME)
      }
    })

    topology
  }


  private def createTrendMetricStateStore(): StoreBuilder[KeyValueStore[String, TrendMetric]] = {

    val stateStoreConfiguration = appConfiguration.stateStoreConfig

    val storeBuilder = new HaystackStoreBuilder(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, stateStoreConfiguration.stateStoreCacheSize)

    if (stateStoreConfiguration.enableChangeLogging) {
      storeBuilder
        .withLoggingEnabled(JavaConverters.mapAsJavaMap(stateStoreConfiguration.changeLogTopicConfiguration))

    } else {
      storeBuilder
        .withLoggingDisabled()
    }
  }


  override def get(): Topology = {
    val topology = new Topology
    initialize(topology)
  }
}

org.apache.kafka.common.serialization.StringSerializer Scala Examples