org.apache.kafka.common.serialization.StringSerializer Scala Examples

The following examples show how to use org.apache.kafka.common.serialization.StringSerializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com.example

import java.util.Properties

import org.apache.kafka.common.serialization.StringSerializer

package object writer {

  val topic = "first_topic"
  val numbersProducerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )

  implicit def buildPropertiesFromMap(properties: Map[String, String]): Properties =
    (new Properties /: properties) {
      case (a, (k, v)) =>
        a.put(k,v)
        a
    }

  def setupLogging(): Unit = {
    import org.apache.log4j.{Level, Logger}
    val rootLogger = Logger.getRootLogger
    rootLogger.setLevel(Level.ERROR)
  }

} 
Example 2
Source File: WordCountTestableSpec.scala    From kafka-streams   with Apache License 2.0 5 votes vote down vote up
package com.supergloo.examples

import com.supergloo.WordCountTestable
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.kafka.streams.TopologyTestDriver
import org.apache.kafka.streams.state.KeyValueStore
import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.scalatest.{FlatSpec, Matchers}

class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup {

  val wordCountApplication = new WordCountTestable

  "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in {
    val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello, WORLDY, World worlD Test"
    driver.pipeInput(recordFactory.create(words))
    val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer())
    record.value() shouldBe words.toLowerCase
    driver.close()
  }

  "WordCountTestable" should "count number of words" in {
    val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello Kafka Streams, All streams lead to Kafka"
    driver.pipeInput(recordFactory.create(words))
    val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store")
    store.get("hello") shouldBe 1
    store.get("kafka") shouldBe 2
    store.get("streams") shouldBe 2
    store.get("lead") shouldBe 1
    store.get("to") shouldBe 1
    driver.close()

  }

} 
Example 3
Source File: TheFlashTweetsProducer.scala    From KafkaPlayground   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.pedrovgs.kafkaplayground.flash

import cakesolutions.kafka.KafkaProducer.Conf
import cakesolutions.kafka.{KafkaProducer, KafkaProducerRecord}
import com.danielasfregola.twitter4s.entities.{Geo, Tweet}
import org.apache.commons.lang.StringEscapeUtils
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ExecutionContext, Future}

object TheFlashTweetsProducer {
  private val unknownLocationFlashTopic = "the-flash-tweets"
  private val locatedFlashTopic         = "the-flash-tweets-with-location"
}

class TheFlashTweetsProducer(private val brokerAddress: String,
                             implicit val ec: ExecutionContext = ExecutionContext.global) {

  import TheFlashTweetsProducer._

  private val flashProducer = KafkaProducer(
    Conf(
      keySerializer = new StringSerializer(),
      valueSerializer = new StringSerializer(),
      bootstrapServers = brokerAddress,
      enableIdempotence = true,
      lingerMs = 20,
      batchSize = 32 * 1024
    ).withProperty("compression.type", "snappy")
  )

  def apply(tweet: Tweet): Future[Tweet] = {
    println(s"Sending tweet to the associated topic: ${tweet.text}")
    tweet.geo match {
      case Some(coordinates) => sendGeoLocatedFlashAdvertisement(tweet, coordinates)
      case _                 => sendUnknownLocationFlashAdvertisement(tweet)
    }
  }

  private def sendGeoLocatedFlashAdvertisement(tweet: Tweet, coordinates: Geo): Future[Tweet] =
    sendRecordToProducer(
      topic = locatedFlashTopic,
      message = s"""
           |{
           |  "latitude": ${coordinates.coordinates.head},
           |  "longitude": ${coordinates.coordinates.last},
           |  "id": "${tweet.id}",
           |  "message": "${StringEscapeUtils.escapeJava(tweet.text)}"
           |}
       """.stripMargin
    ).map(_ => tweet)

  private def sendUnknownLocationFlashAdvertisement(tweet: Tweet): Future[Tweet] =
    sendRecordToProducer(
      topic = unknownLocationFlashTopic,
      message = s"""
           |{
           |  "message": "${StringEscapeUtils.escapeJava(tweet.text)}"
           |}
        """.stripMargin
    ).map(_ => tweet)

  private def sendRecordToProducer(topic: String, message: String) =
    flashProducer.send(
      KafkaProducerRecord[String, String](topic = topic, value = message)
    )
} 
Example 4
Source File: EmbeddedKafkaServer.scala    From KafkaPlayground   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.pedrovgs.kafkaplayground.utils

import cakesolutions.kafka.KafkaProducerRecord
import cakesolutions.kafka.testkit.KafkaServer
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfter, Suite}

import scala.concurrent.duration._

trait EmbeddedKafkaServer extends BeforeAndAfter {
  this: Suite =>

  private var kafkaServer: KafkaServer = _

  before {
    kafkaServer = new KafkaServer
    startKafkaServer()
  }

  after {
    stopKafkaServer()
  }

  def startKafkaServer(): Unit = kafkaServer.startup()

  def stopKafkaServer(): Unit = kafkaServer.close()

  def kafkaServerAddress(): String = s"localhost:${kafkaServer.kafkaPort}"

  def zookeeperServerAddress(): String = s"localhost:${kafkaServer.zookeeperPort}"

  def recordsForTopic(topic: String, expectedNumberOfRecords: Int = 1): Iterable[String] =
    kafkaServer
      .consume[String, String](
        topic = topic,
        keyDeserializer = new StringDeserializer,
        valueDeserializer = new StringDeserializer,
        expectedNumOfRecords = expectedNumberOfRecords,
        timeout = 10.seconds.toMillis
      )
      .map(_._2)

  def produceMessage(topic: String, content: String): Unit =
    kafkaServer.produce(
      topic = topic,
      records = Seq(KafkaProducerRecord[String, String](topic = topic, value = content)),
      keySerializer = new StringSerializer(),
      valueSerializer = new StringSerializer()
    )

} 
Example 5
Source File: KafkaJsonSerializer.scala    From ticket-booking-aecor   with Apache License 2.0 5 votes vote down vote up
package ru.pavkin.payment.kafka
import java.nio.charset.StandardCharsets
import java.util

import io.circe.parser._
import io.circe.Encoder
import org.apache.kafka.common.serialization.{ Deserializer, Serializer, StringSerializer }
import ru.pavkin.payment.event.PaymentReceived

class PaymentReceivedEventSerializer extends Serializer[PaymentReceived] {
  private val stringSerializer = new StringSerializer

  def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  def serialize(topic: String, data: PaymentReceived): Array[Byte] =
    stringSerializer.serialize(topic, Encoder[PaymentReceived].apply(data).noSpaces)

  def close(): Unit = ()
}

class PaymentReceivedEventDeserializer extends Deserializer[PaymentReceived] {
  def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  def close(): Unit = ()

  def deserialize(topic: String, data: Array[Byte]): PaymentReceived =
    if (data ne null)
      decode[PaymentReceived](new String(data, StandardCharsets.UTF_8)).fold(throw _, identity)
    else null

} 
Example 6
Source File: KafkaTestClient.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
} 
Example 7
Source File: KafkaClient.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

} 
Example 8
Source File: TestProducer.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka.producer

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.Future

object TestProducer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    logger.info("Start producer")

    implicit val system = ActorSystem("producer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer)
    val done: Future[Done] =
      Source(1 to 100)
        .map(value => new ProducerRecord[String, String]("test-topic", s"msg ${value}"))
        .runWith(Producer.plainSink(producerSettings))

    done onComplete {
      case scala.util.Success(_) => logger.info("Done"); system.terminate()
      case scala.util.Failure(err) => logger.error(err.toString); system.terminate()
    }
  }
} 
Example 9
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import java.util.Collections

import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import cakesolutions.kafka.KafkaProducer.Conf
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

package object example {

  val topic = "sample_topic"

  val kafkaProducer = KafkaProducer(
    Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = "localhost:9092")
  )

  val kafkaProducerConf = KafkaProducer.Conf(
    new StringSerializer, new StringSerializer,
    bootstrapServers = "localhost:9092"
  )

  val kafkaConsumerConf = KafkaConsumer.Conf(
    new StringDeserializer,
    new StringDeserializer,
    groupId = "test_group",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST,
    bootstrapServers = "localhost:9092")
} 
Example 10
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import akka.actor.ActorSystem
import akka.kafka.{ConsumerSettings, ProducerSettings}
import akka.stream.ActorMaterializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

package object example {
  implicit val system = ActorSystem("FlowProducerMain")
  implicit val materializer = ActorMaterializer()

  val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers("localhost:9092")

  val topic = "sample_topic"
  val topic1 = "topic1"
  val topic2 = "topic2"


  val consumerSettings =
    ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
} 
Example 11
Source File: PredictionLogger.scala    From ForestFlow   with Apache License 2.0 5 votes vote down vote up
package ai.forestflow.event.subscribers

import java.nio.ByteOrder

import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import ai.forestflow.serving.config.ApplicationEnvironment
import akka.actor.{Actor, ActorLogging, Props}
import akka.kafka.ProducerSettings
import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import graphpipe.InferRequest
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}
//import scalapb.json4s.JsonFormat

import scala.util.{Success, Try}

object PredictionLogger {
  

  private lazy val binaryProducerSettings =
    ProducerSettings(producerConfig, new StringSerializer, new ByteArraySerializer)
  private lazy val binaryProducer = binaryProducerSettings.createKafkaProducer()

  override def preStart(): Unit = {
    if (basic_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEvent])

    if (gp_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEventGP])
    super.preStart()
  }
  override def receive: Receive = {
    case event@PredictionEvent(prediction, servedRequest, inferenceRequest, loggingSettings) =>

      val key = loggingSettings
        .keyFeatures
        .flatMap(inferenceRequest.configs.get)
        .mkString(loggingSettings.getKeyFeaturesSeparator)

      if (key.length > 0 )
        binaryProducer.send(new ProducerRecord(basic_topic.get, key, event.toByteArray))
      else
        binaryProducer.send(new ProducerRecord(basic_topic.get, event.toByteArray))

    case event@PredictionEventGP(prediction, servedRequest, inferBytes, loggingSettings) =>
      Try {
        val req = graphpipe.Request.getRootAsRequest(inferBytes.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN))
        val inferRequest = req.req(new InferRequest()).asInstanceOf[InferRequest]
        val inferConfigs = inferRequest.config()
          .split(",")
          .map(_.split(":"))
          .flatMap{ case Array(k, v) =>  Some((k, v)) case _ => None}.toMap

        loggingSettings
          .keyFeatures
          .flatMap(inferConfigs.get)
          .mkString(loggingSettings.getKeyFeaturesSeparator)

      } match {
        case Success(key) =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, key, event.toByteArray))
        case _ =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, event.toByteArray))
      }

    case _ => // ignore
  }
} 
Example 12
Source File: KafkaReporter.scala    From Swallow   with Apache License 2.0 5 votes vote down vote up
package com.intel.hibench.common.streaming.metrics

import java.util.Properties

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}
import org.apache.kafka.common.serialization.StringSerializer


class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter {

  private val producer = ProducerSingleton.getInstance(bootstrapServers)

  override def report(startTime: Long, endTime: Long): Unit = {
    producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime"))
  }
}

object ProducerSingleton {
  @volatile private var instance : Option[KafkaProducer[String, String]] = None

  def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized {
    if (!instance.isDefined) {
      synchronized {
        if(!instance.isDefined) {
          val props = new Properties()
          props.put("bootstrap.servers", bootstrapServers)
          instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer))
        }
      }
    }
    instance.get
  }
} 
Example 13
Source File: KafkaProducerUtils.scala    From bigdata-examples   with Apache License 2.0 5 votes vote down vote up
package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

} 
Example 14
Source File: ConsumerToProducer.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka._
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.concurrent.duration._


  def apply(consumerConfig: Config, producerConfig: Config): ActorRef = {

    // Create KafkaConsumerActor config with bootstrap.servers specified in Typesafe config
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(consumerConfig)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds, 5)

    // Create KafkaProducerActor config with defaults and bootstrap.servers specified in Typesafe config
    val producerConf = KafkaProducer.Conf(new StringSerializer, new StringSerializer).withConf(producerConfig)

    val system = ActorSystem()
    system.actorOf(Props(new ConsumerToProducer(consumerConf, actorConf, producerConf)))
  }
}

class ConsumerToProducer(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf,
  producerConf: KafkaProducer.Conf[String, String]) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  // The KafkaConsumerActor
  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )
  context.watch(consumer)

  // The KafkaProducerActor
  private val producer = context.actorOf(KafkaProducerActor.props(producerConf))

  consumer ! Subscribe.AutoPartition(List("topic1"))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records)

    // Confirmed Offsets from KafkaProducer
    case o: Offsets =>
      consumer ! Confirm(o, commit = true)
  }

  // Demonstrates some transformation of the messages before forwarding to KafkaProducer
  private def processRecords(records: ConsumerRecords[String, String]) = {
    val transformedRecords = records.pairs.map { case (key, value) =>
      (key, value + ".")
    }

    // Send records to Topic2.  Offsets will be sent back to this actor once confirmed.
    producer ! ProducerRecords.fromKeyValues[String, String]("topic2", transformedRecords, Some(records.offsets), None)

    // Could have sent them like this if we didn't first transform:
    // producer ! ProducerRecords.fromConsumerRecords("topic2", records, None)
  }
} 
Example 15
Source File: KafkaConsumerPerfSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka

import cakesolutions.kafka.KafkaConsumer.Conf
import com.typesafe.config.ConfigFactory
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random


class KafkaConsumerPerfSpec extends FlatSpecLike
  with Matchers
  with BeforeAndAfterAll {

  val log = LoggerFactory.getLogger(getClass)

  val config = ConfigFactory.load()

  val msg1k = scala.io.Source.fromInputStream(getClass.getResourceAsStream("/1k.txt")).mkString

  val consumer = KafkaConsumer(
    Conf(config.getConfig("consumer"),
      new StringDeserializer,
      new StringDeserializer)
  )

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  "Kafka Consumer with single partition topic" should "perform" in {
    val topic = randomString
    val producerConf = KafkaProducer.Conf(config.getConfig("producer"), new StringSerializer, new StringSerializer)
    val producer = KafkaProducer[String, String](producerConf)

    1 to 100000 foreach { n =>
      producer.send(KafkaProducerRecord(topic, None, msg1k))
    }
    producer.flush()
    log.info("Delivered 100000 msg to topic {}", topic)

    consumer.subscribe(List(topic).asJava)

    var start = 0l

    var total = 0

    while (total < 100000) {
      if(total == 0)
        start = System.currentTimeMillis()
      val count = consumer.poll(1000).count()
      total += count
    }

    val totalTime = System.currentTimeMillis() - start
    val messagesPerSec = 100000 / totalTime * 1000
    log.info("Total Time millis : {}", totalTime)
    log.info("Messages per sec  : {}", messagesPerSec)

    totalTime should be < 4000L

    consumer.close()
    producer.close()
  }
} 
Example 16
Source File: IdempotentProducerSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka

import org.apache.kafka.clients.consumer.ConsumerRecords
import org.apache.kafka.common.KafkaException
import org.apache.kafka.common.requests.IsolationLevel
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Random

class IdempotentProducerSpec extends KafkaIntSpec {
  private val log = LoggerFactory.getLogger(getClass)

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val idempotentProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      enableIdempotence = true)

  val transactionalProducerConfig: KafkaProducer.Conf[String, String] =
    KafkaProducer.Conf(new StringSerializer(),
      new StringSerializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      transactionalId = Some("t1"),
      enableIdempotence = true)

  val consumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false)

  val transactionConsumerConfig: KafkaConsumer.Conf[String, String] =
    KafkaConsumer.Conf(new StringDeserializer(),
      new StringDeserializer(),
      bootstrapServers = s"localhost:$kafkaPort",
      groupId = randomString,
      enableAutoCommit = false,
      isolationLevel = IsolationLevel.READ_COMMITTED)

  "Producer with idempotent config" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(idempotentProducerConfig)
    val consumer = KafkaConsumer(consumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)
    producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
    producer.flush()

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }

  "Producer with transaction" should "deliver batch" in {
    val topic = randomString
    log.info(s"Using topic [$topic] and kafka port [$kafkaPort]")

    val producer = KafkaProducer(transactionalProducerConfig)
    val consumer = KafkaConsumer(transactionConsumerConfig)

    consumer.subscribe(List(topic).asJava)

    val records1 = consumer.poll(1000)
    records1.count() shouldEqual 0

    log.info("Kafka producer connecting on port: [{}]", kafkaPort)

    producer.initTransactions()

    try {
      producer.beginTransaction()
      producer.send(KafkaProducerRecord(topic, Some("key"), "value"))
      producer.commitTransaction()
    } catch {
      case ex: KafkaException =>
        log.error(ex.getMessage, ex)
        producer.abortTransaction()
    }

    val records2: ConsumerRecords[String, String] = consumer.poll(1000)
    records2.count() shouldEqual 1

    producer.close()
    consumer.close()
  }
} 
Example 17
Source File: KafkaProducerActorSpec.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.akka

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.util.Random

class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) {

  def this() = this(ActorSystem("KafkaProducerActorSpec"))

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val deserializer = new StringDeserializer
  val consumerConf = KafkaConsumer.Conf(
    deserializer, deserializer,
    bootstrapServers = s"localhost:$kafkaPort",
    groupId = "test",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST
  )

  val serializer = new StringSerializer
  val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort")

  "KafkaProducerActor" should "write a given batch to Kafka" in {
    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar"))
    val message = ProducerRecords(batch, Some('response))

    probe.send(producer, message)

    probe.expectMsg('response)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in {
    import scala.concurrent.duration._

    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar")
    )
    val message = ProducerRecords(batch)

    probe.send(producer, message)

    probe.expectNoMessage(3.seconds)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) =
    kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer)
} 
Example 18
Source File: UseCase.scala    From Fast-Data-Processing-Systems-with-SMACK-Stack   with MIT License 5 votes vote down vote up
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, Source}
import com.softwaremill.react.kafka.KafkaMessages._
import org.apache.kafka.common.serialization.{StringSerializer, StringDeserializer}
import com.softwaremill.react.kafka.{ProducerMessage, ConsumerProperties, ProducerProperties, ReactiveKafka}
import org.reactivestreams.{ Publisher, Subscriber }

implicit val actorSystem = ActorSystem("ReactiveKafka")
implicit val materializer = ActorMaterializer()

val kafka = new ReactiveKafka()
val publisher: Publisher[StringConsumerRecord] = kafka.consume(ConsumerProperties(
 bootstrapServers = "localhost:9092",
 topic = "lowercaseStrings",
 groupId = "groupName",
 valueDeserializer = new StringDeserializer()
))

val subscriber: Subscriber[StringProducerMessage] = kafka.publish(ProducerProperties(
  bootstrapServers = "localhost:9092",
  topic = "uppercaseStrings",
  valueSerializer = new StringSerializer()
))

Source.fromPublisher(publisher).map(m => ProducerMessage(m.value().toUpperCase))
  .to(Sink.fromSubscriber(subscriber)).run() 
Example 19
import akka.kafka._
import akka.kafka.scaladsl._
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.common.serialization.ByteArraySerializer

val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers("localhost:9092")

Source(1 to 10000)
  .map(_.toString)
  .map(elem => new ProducerRecord[Array[Byte], String]("topic1", elem))
  .to(Producer.plainSink(producerSettings))

Source(1 to 10000).map(elem => ProducerMessage.Message(new ProducerRecord[Array[Byte], String]("topic1", elem.toString), elem))
    .via(Producer.flow(producerSettings))
    .map { result =>
      val record = result.message.record
      println(s"${record.topic}/${record.partition} ${result.offset}: ${record.value} (${result.message.passThrough}")
      result
    } 
Example 20
Source File: DataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
} 
Example 21
Source File: WordCountProducer.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.kafka

import java.util
import java.util.concurrent.ThreadLocalRandom

import akka.actor.ActorSystem
import akka.kafka.ProducerMessage.Message
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ThrottleMode
import akka.stream.scaladsl.{Keep, Sink, Source}
import akka.{Done, NotUsed}
import org.apache.kafka.clients.producer.{Partitioner, ProducerRecord}
import org.apache.kafka.common.errors.{NetworkException, UnknownTopicOrPartitionException}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.common.{Cluster, PartitionInfo}

import scala.concurrent.Future
import scala.concurrent.duration._


class CustomPartitioner extends Partitioner {
  override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = {
    val partitionInfoList: util.List[PartitionInfo] = cluster.availablePartitionsForTopic(topic)
    val partitionCount = partitionInfoList.size
    val fakeNewsPartition = 0

    //println("CustomPartitioner received key: " + key + " and value: " + value)

    if (value.toString.contains(WordCountProducer.fakeNewsKeyword)) {
      //println("CustomPartitioner send message: " + value + " to fakeNewsPartition")
      fakeNewsPartition
    }
    else ThreadLocalRandom.current.nextInt(1, partitionCount) //round robin
  }

  override def close(): Unit = {
    println("CustomPartitioner: " + Thread.currentThread + " received close")
  }

  override def configure(configs: util.Map[String, _]): Unit = {
    println("CustomPartitioner received configure with configuration: " + configs)
  }
}

object CustomPartitioner {
  private def deserialize[V](objectData: Array[Byte]): V = org.apache.commons.lang3.SerializationUtils.deserialize(objectData).asInstanceOf[V]
} 
Example 22
Source File: SKRSpec.scala    From spark-kafka-writer   with Apache License 2.0 5 votes vote down vote up
package com.github.benfradet.spark.kafka.writer

import java.util.concurrent.atomic.AtomicInteger

import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.scalatest.concurrent.Eventually
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}

import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

case class Foo(a: Int, b: String)

trait SKRSpec
  extends AnyWordSpec
  with Matchers
  with BeforeAndAfterEach
  with BeforeAndAfterAll
  with Eventually {

  val sparkConf = new SparkConf()
    .setMaster("local[1]")
    .setAppName(getClass.getSimpleName)

  var ktu: KafkaTestUtils = _
  override def beforeAll(): Unit = {
    ktu = new KafkaTestUtils
    ktu.setup()
  }
  override def afterAll(): Unit = {
    SKRSpec.callbackTriggerCount.set(0)
    if (ktu != null) {
      ktu.tearDown()
      ktu = null
    }
  }

  var topic: String = _
  var ssc: StreamingContext = _
  var spark: SparkSession = _
  override def afterEach(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }
    if (spark != null) {
      spark.stop()
      spark = null
    }
  }
  override def beforeEach(): Unit = {
    ssc = new StreamingContext(sparkConf, Seconds(1))
    spark = SparkSession.builder
      .config(sparkConf)
      .getOrCreate()
    topic = s"topic-${Random.nextInt()}"
    ktu.createTopics(topic)
  }

  def collect(ssc: StreamingContext, topic: String): ArrayBuffer[String] = {
    val kafkaParams = Map(
      "bootstrap.servers" -> ktu.brokerAddress,
      "auto.offset.reset" -> "earliest",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "test-collect"
    )
    val results = new ArrayBuffer[String]
    KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](Set(topic), kafkaParams)
    ).map(_.value())
      .foreachRDD { rdd =>
        results ++= rdd.collect()
        ()
      }
    results
  }

  val producerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )
}

object SKRSpec {
  val callbackTriggerCount = new AtomicInteger()
} 
Example 23
Source File: KafkaProducerCacheSpec.scala    From spark-kafka-writer   with Apache License 2.0 5 votes vote down vote up
package com.github.benfradet.spark.kafka.writer

import com.google.common.cache.Cache
import org.apache.kafka.clients.producer._
import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.PrivateMethodTester

import scala.concurrent.duration._

class KafkaProducerCacheSpec extends SKRSpec with PrivateMethodTester {
  val cache = PrivateMethod[Cache[Seq[(String, Object)], KafkaProducer[_, _]]]('cache)
  val m1 = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )
  val m2 = m1 + ("acks" -> "0")

  override def beforeAll(): Unit = {
    super.beforeAll()
    KafkaProducerCache.invokePrivate(cache()).invalidateAll()
  }

  "A KafkaProducerCache" when {
    "calling getProducer" should {
      "create the producer if it doesn't exist and retrieve it if it exists" in {
        cacheSize shouldBe 0
        val p1 = KafkaProducerCache.getProducer[String, String](m1)
        cacheSize shouldBe 1
        val p2 = KafkaProducerCache.getProducer[String, String](m1)
        p1 shouldBe p2
        cacheSize shouldBe 1
      }
    }

    "closing a producer" should {
      "close the correct producer" in {
        cacheSize shouldBe 1
        val p1 = KafkaProducerCache.getProducer[String, String](m1)
        cacheSize shouldBe 1
        val p2 = KafkaProducerCache.getProducer[String, String](m2)
        cacheSize shouldBe 2
        p1 should not be p2
        KafkaProducerCache.close(m1)
        cacheSize shouldBe 1
      }
    }
  }

  private def cacheSize: Int = KafkaProducerCache.invokePrivate(cache()).asMap.size
} 
Example 24
Source File: EventProducer.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
} 
Example 25
Source File: ProducerStream.scala    From reactive-kafka-microservice-template   with Apache License 2.0 5 votes vote down vote up
package com.omearac.producers

import akka.actor.{ActorRef, ActorSystem}
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.OverflowStrategy
import akka.stream.scaladsl.{Flow, Source}
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.{AkkaStreams, EventSourcing}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}



trait ProducerStream extends AkkaStreams with EventSourcing {
    implicit val system: ActorSystem
    def self: ActorRef

    def createStreamSource[msgType] = {
        Source.queue[msgType](Int.MaxValue,OverflowStrategy.backpressure)
    }

    def createStreamSink(producerProperties: Map[String, String]) = {
        val kafkaMBAddress = producerProperties("bootstrap-servers")
        val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers(kafkaMBAddress)

        Producer.plainSink(producerSettings)
    }

    def createStreamFlow[msgType: Conversion](producerProperties: Map[String, String]) = {
        val numberOfPartitions = producerProperties("num.partitions").toInt -1
        val topicToPublish = producerProperties("publish-topic")
        val rand = new scala.util.Random
        val range = 0 to numberOfPartitions

        Flow[msgType].map { msg =>
            val partition = range(rand.nextInt(range.length))
            val stringJSONMessage = Conversion[msgType].convertToJson(msg)
            new ProducerRecord[Array[Byte], String](topicToPublish, partition, null, stringJSONMessage)
        }
    }
} 
Example 26
Source File: KafkaProducerConnector.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.connector.kafka

import akka.actor.ActorSystem
import akka.pattern.after
import org.apache.kafka.clients.producer._
import org.apache.kafka.common.errors._
import org.apache.kafka.common.serialization.StringSerializer
import pureconfig._
import pureconfig.generic.auto._
import org.apache.openwhisk.common.{Counter, Logging, TransactionId}
import org.apache.openwhisk.connector.kafka.KafkaConfiguration._
import org.apache.openwhisk.core.ConfigKeys
import org.apache.openwhisk.core.connector.{Message, MessageProducer}
import org.apache.openwhisk.core.entity.{ByteSize, UUIDs}
import org.apache.openwhisk.utils.Exceptions

import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.concurrent.{blocking, ExecutionContext, Future, Promise}
import scala.util.{Failure, Success}

class KafkaProducerConnector(
  kafkahosts: String,
  id: String = UUIDs.randomUUID().toString,
  maxRequestSize: Option[ByteSize] = None)(implicit logging: Logging, actorSystem: ActorSystem)
    extends MessageProducer
    with Exceptions {

  implicit val ec: ExecutionContext = actorSystem.dispatcher
  private val gracefulWaitTime = 100.milliseconds

  override def sentCount(): Long = sentCounter.cur

  
  override def close(): Unit = {
    logging.info(this, "closing producer")
    producer.close()
  }

  private val sentCounter = new Counter()

  private def createProducer(): KafkaProducer[String, String] = {
    val config = Map(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> kafkahosts) ++
      configMapToKafkaConfig(loadConfigOrThrow[Map[String, String]](ConfigKeys.kafkaCommon)) ++
      configMapToKafkaConfig(loadConfigOrThrow[Map[String, String]](ConfigKeys.kafkaProducer)) ++
      (maxRequestSize map { max =>
        Map("max.request.size" -> max.size.toString)
      } getOrElse Map.empty)

    verifyConfig(config, ProducerConfig.configNames().asScala.toSet)

    tryAndThrow("creating producer")(new KafkaProducer(config, new StringSerializer, new StringSerializer))
  }

  private def recreateProducer(): Unit = {
    logging.info(this, s"recreating producer")
    tryAndSwallow("closing old producer")(producer.close())
    logging.info(this, s"old producer closed")
    producer = createProducer()
  }

  @volatile private var producer = createProducer()
} 
Example 27
Source File: CacheInvalidator.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.database.cosmosdb.cache

import akka.Done
import akka.actor.{ActorSystem, CoordinatedShutdown}
import akka.kafka.ProducerSettings
import akka.stream.ActorMaterializer
import com.google.common.base.Throwables
import com.typesafe.config.Config
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.openwhisk.common.Logging
import org.apache.openwhisk.core.database.RemoteCacheInvalidation.cacheInvalidationTopic

import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success}

object CacheInvalidator {

  val instanceId = "cache-invalidator"
  val whisksCollection = "whisks"

  def start(
    globalConfig: Config)(implicit system: ActorSystem, materializer: ActorMaterializer, log: Logging): Future[Done] = {
    implicit val ec: ExecutionContext = system.dispatcher
    val config = CacheInvalidatorConfig(globalConfig)
    val producer =
      KafkaEventProducer(
        kafkaProducerSettings(defaultProducerConfig(globalConfig)),
        cacheInvalidationTopic,
        config.eventProducerConfig)
    val observer = new WhiskChangeEventObserver(config.invalidatorConfig, producer)
    val feedConsumer = new ChangeFeedConsumer(whisksCollection, config, observer)
    feedConsumer.isStarted.andThen {
      case Success(_) =>
        registerShutdownTasks(system, feedConsumer, producer)
        log.info(this, s"Started the Cache invalidator service. ClusterId [${config.invalidatorConfig.clusterId}]")
      case Failure(t) =>
        log.error(this, "Error occurred while starting the Consumer" + Throwables.getStackTraceAsString(t))
    }
  }

  private def registerShutdownTasks(system: ActorSystem,
                                    feedConsumer: ChangeFeedConsumer,
                                    producer: KafkaEventProducer)(implicit ec: ExecutionContext, log: Logging): Unit = {
    CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind, "closeFeedListeners") { () =>
      feedConsumer
        .close()
        .flatMap { _ =>
          producer.close().andThen {
            case Success(_) =>
              log.info(this, "Kafka producer successfully shutdown")
          }
        }
    }
  }

  def kafkaProducerSettings(config: Config): ProducerSettings[String, String] =
    ProducerSettings(config, new StringSerializer, new StringSerializer)

  def defaultProducerConfig(globalConfig: Config): Config = globalConfig.getConfig("akka.kafka.producer")

} 
Example 28
Source File: StreamStreamDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
} 
Example 29
Source File: EmbeddedKafkaUnavailableSpec.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.BeforeAndAfterAll
import org.scalatest.tagobjects.Slow

class EmbeddedKafkaUnavailableSpec
    extends EmbeddedKafkaSpecSupport
    with EmbeddedKafka
    with BeforeAndAfterAll {

  "the publishToKafka method" should {
    "throw a KafkaUnavailableException when Kafka is unavailable when trying to publish" in {
      a[KafkaUnavailableException] shouldBe thrownBy {
        implicit val serializer = new StringSerializer()
        publishToKafka("non_existing_topic", "a message")
      }
    }
  }

  "the consumeFirstStringMessageFrom method" should {
    "throw a KafkaUnavailableException when there's no running instance of Kafka" taggedAs Slow ignore {
      // TODO: This test is *really* slow. The request.max.timeout.ms in the underlying consumer should be changed.
      a[KafkaUnavailableException] shouldBe thrownBy {
        consumeFirstStringMessageFrom("non_existing_topic")
      }
    }
  }
} 
Example 30
Source File: StreamStaticDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
} 
Example 31
Source File: MultiDataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
} 
Example 32
Source File: EmbeddedKafkaUnavailableSpec.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.common.serialization.StringSerializer
import org.scalatest.BeforeAndAfterAll
import org.scalatest.tagobjects.Slow

class EmbeddedKafkaUnavailableSpec
    extends EmbeddedKafkaSpecSupport
    with BeforeAndAfterAll {
  "the publishToKafka method" should {
    "throw a KafkaUnavailableException when Kafka is unavailable when trying to publish" in {
      a[KafkaUnavailableException] shouldBe thrownBy {
        implicit val serializer: StringSerializer = new StringSerializer()
        publishToKafka("non_existing_topic", "a message")
      }
    }
  }

  "the consumeFirstStringMessageFrom method" should {
    "throw a KafkaUnavailableException when there's no running instance of Kafka" taggedAs Slow ignore {
      // TODO: This test is *really* slow. The request.max.timeout.ms in the underlying consumer should be changed.
      a[KafkaUnavailableException] shouldBe thrownBy {
        consumeFirstStringMessageFrom("non_existing_topic")
      }
    }
  }
} 
Example 33
Source File: KafkaOutput.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.kafka

import java.io.{Serializable => JSerializable}
import java.util.Properties

import com.stratio.sparta.plugin.input.kafka.KafkaBase
import com.stratio.sparta.sdk.pipeline.output.Output._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import com.stratio.sparta.sdk.properties.CustomProperties
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.kafka.clients.producer.ProducerConfig._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.sql._

import scala.collection.mutable

class KafkaOutput(name: String, properties: Map[String, JSerializable])
  extends Output(name, properties) with KafkaBase with CustomProperties {

  val DefaultKafkaSerializer = classOf[StringSerializer].getName
  val DefaultAck = "0"
  val DefaultBatchNumMessages = "200"
  val DefaultProducerPort = "9092"

  override val customKey = "KafkaProperties"
  override val customPropertyKey = "kafkaPropertyKey"
  override val customPropertyValue = "kafkaPropertyValue"

  val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase)
  val rowSeparator = properties.getString("rowSeparator", ",")

  override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append)

  override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = {
    val tableName = getTableNameFromOptions(options)

    validateSaveMode(saveMode)

    outputFormat match {
      case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages =>
        messages.foreach(message => send(tableName, message.mkString(rowSeparator))))
      case _ => dataFrame.toJSON.foreachPartition { messages =>
        messages.foreach(message => send(tableName, message))
      }
    }
  }

  def send(topic: String, message: String): Unit = {
    val record = new ProducerRecord[String, String](topic, message)
    KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record)
  }

  private[kafka] def getProducerConnectionKey: String =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort)
      .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list"))

  private[kafka] def createProducerProps: Properties = {
    val props = new Properties()
    properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) }
    mandatoryOptions.foreach { case (key, value) => props.put(key, value) }
    getCustomProperties.foreach { case (key, value) => props.put(key, value) }
    props
  }

  private[kafka] def mandatoryOptions: Map[String, String] =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++
      Map(
        KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck),
        BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages)
      )

  override def cleanUp(options: Map[String, String]): Unit = {
    log.info(s"Closing Kafka producer in Kafka Output: $name")
    KafkaOutput.closeProducers()
  }
}

object KafkaOutput {

  private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty

  def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = {
    getInstance(producerKey, properties)
  }

  def closeProducers(): Unit = {
    producers.values.foreach(producer => producer.close())
  }

  private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = {
    producers.getOrElse(key, {
      val producer = new KafkaProducer[String, String](properties)
      producers.put(key, producer)
      producer
    })
  }
} 
Example 34
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
} 
Example 35
Source File: EventAggregationSpec.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
} 
Example 36
Source File: KafkaBean.scala    From estuary   with Apache License 2.0 5 votes vote down vote up
package com.neighborhood.aka.laplace.estuary.bean.datasink


import com.neighborhood.aka.laplace.estuary.bean.key._
import com.neighborhood.aka.laplace.estuary.core.sink.mysql.MysqlSinkFunc
import org.apache.kafka.common.serialization.StringSerializer


  var isSync = false

  var maxInFlightRequestsPerConnection = "1"

}

object KafkaBean {
  def buildConfig(kafkaBean: KafkaBean): java.util.HashMap[String, String] = {
    val config: java.util.HashMap[String, String] = new java.util.HashMap[String, String]()
    config.put("acks", kafkaBean.ack)
    config.put("linger.ms", kafkaBean.lingerMs)
    config.put("retries", kafkaBean.kafkaRetries)
    config.put("bootstrap.servers", kafkaBean.bootstrapServers)
    config.put("max.block.ms", kafkaBean.maxBlockMs)
    config.put("max.request.size", kafkaBean.maxRequestSize)
    config.put("request.timeout.ms", kafkaBean.requestTimeoutMs)
    config.put("key.serializer", kafkaBean.keySerializer)
    config.put("value.serializer", kafkaBean.valueSerializer)
    config.put("partitioner.class", kafkaBean.partitionerClass)
    config.put("compression.type", kafkaBean.compressionType)
    config.put("batch.size", kafkaBean.batchSize)
    config.put("retry.backoff.ms", kafkaBean.retryBackoffMs)
    config.put("max.in.flight.requests.per.connection", kafkaBean.maxInFlightRequestsPerConnection)
    config
  }
} 
Example 37
Source File: Streams.scala    From haystack-trends   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trends.kstream

import java.util.function.Supplier

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.commons.kstreams.serde.metricdata.{MetricDataSerde, MetricTankSerde}
import com.expedia.www.haystack.trends.aggregation.TrendMetric
import com.expedia.www.haystack.trends.config.AppConfiguration
import com.expedia.www.haystack.trends.kstream.processor.{AdditionalTagsProcessorSupplier, ExternalKafkaProcessorSupplier, MetricAggProcessorSupplier}
import com.expedia.www.haystack.trends.kstream.store.HaystackStoreBuilder
import org.apache.kafka.common.serialization.{Serde, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.Topology
import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters

class Streams(appConfiguration: AppConfiguration) extends Supplier[Topology] {

  private val LOGGER = LoggerFactory.getLogger(classOf[Streams])
  private val TOPOLOGY_SOURCE_NAME = "metricpoint-source"
  private val TOPOLOGY_EXTERNAL_SINK_NAME = "metricpoint-aggegated-sink-external"
  private val TOPOLOGY_INTERNAL_SINK_NAME = "metric-data-aggegated-sink-internal"
  private val TOPOLOGY_AGGREGATOR_PROCESSOR_NAME = "metricpoint-aggregator-process"
  private val TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME = "additional-tags-process"
  private val TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME = "trend-metric-store"
  private val kafkaConfig = appConfiguration.kafkaConfig

  private def initialize(topology: Topology): Topology = {

    //add source - topic where the raw metricpoints are pushed by the span-timeseries-transformer
    topology.addSource(
      kafkaConfig.autoOffsetReset,
      TOPOLOGY_SOURCE_NAME,
      kafkaConfig.timestampExtractor,
      new StringDeserializer,
      new MetricTankSerde().deserializer(),
      kafkaConfig.consumeTopic)


    //The processor which performs aggregations on the metrics
    topology.addProcessor(
      TOPOLOGY_AGGREGATOR_PROCESSOR_NAME,
      new MetricAggProcessorSupplier(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, appConfiguration.encoder),
      TOPOLOGY_SOURCE_NAME)


    //key-value, state store associated with each kstreams task(partition)
    // which keeps the trend-metrics which are currently being computed in memory
    topology.addStateStore(createTrendMetricStateStore(), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    // topology to add additional tags if any
    topology.addProcessor(TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME, new AdditionalTagsProcessorSupplier(appConfiguration.additionalTags), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME)

    if (appConfiguration.kafkaConfig.producerConfig.enableExternalKafka) {
      topology.addProcessor(
        TOPOLOGY_EXTERNAL_SINK_NAME,
        new ExternalKafkaProcessorSupplier(appConfiguration.kafkaConfig.producerConfig),
        TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME
        )
    }

    // adding sinks
    appConfiguration.kafkaConfig.producerConfig.kafkaSinkTopics.foreach(sinkTopic => {
      if(sinkTopic.enabled){
        val serde = Class.forName(sinkTopic.serdeClassName).newInstance().asInstanceOf[Serde[MetricData]]
        topology.addSink(
          s"${TOPOLOGY_INTERNAL_SINK_NAME}-${sinkTopic.topic}",
          sinkTopic.topic,
          new StringSerializer,
          serde.serializer(),
          TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME)
      }
    })

    topology
  }


  private def createTrendMetricStateStore(): StoreBuilder[KeyValueStore[String, TrendMetric]] = {

    val stateStoreConfiguration = appConfiguration.stateStoreConfig

    val storeBuilder = new HaystackStoreBuilder(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, stateStoreConfiguration.stateStoreCacheSize)

    if (stateStoreConfiguration.enableChangeLogging) {
      storeBuilder
        .withLoggingEnabled(JavaConverters.mapAsJavaMap(stateStoreConfiguration.changeLogTopicConfiguration))

    } else {
      storeBuilder
        .withLoggingDisabled()
    }
  }


  override def get(): Topology = {
    val topology = new Topology
    initialize(topology)
  }
}