org.apache.kafka.clients.producer.ProducerConfig Scala Examples

The following examples show how to use org.apache.kafka.clients.producer.ProducerConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: IntegrationTest.scala    From kmq   with Apache License 2.0 6 votes vote down vote up
package com.softwaremill.kmq.redelivery

import java.time.Duration
import java.util.Random

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.testkit.TestKit
import com.softwaremill.kmq._
import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.time.{Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}

import scala.collection.mutable.ArrayBuffer

class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers {

  implicit val materializer = ActorMaterializer()
  import system.dispatcher

  "KMQ" should "resend message if not committed" in {
    val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}"
    val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis,
    1000)

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withBootstrapServers(bootstrapServer)
      .withGroupId(kmqConfig.getMsgConsumerGroupId)
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    val markerProducerSettings = ProducerSettings(system,
      new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer())
      .withBootstrapServers(bootstrapServer)
      .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName)
    val markerProducer = markerProducerSettings.createKafkaProducer()

    val random = new Random()

    lazy val processedMessages = ArrayBuffer[String]()
    lazy val receivedMessages = ArrayBuffer[String]()

    val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic
      .map { msg =>
      ProducerMessage.Message(
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg)
    }
      .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker
      .map(_.message.passThrough)
      .mapAsync(1) { msg =>
        msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched
      }
      .map { msg =>
        receivedMessages += msg.value
        msg
      }
      .filter(_ => random.nextInt(5) != 0)
      .map { processedMessage =>
        processedMessages += processedMessage.value
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE)
      }
      .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers
      .run()

    val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig)

    val messages = (0 to 20).map(_.toString)
    messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg))

    eventually {
      receivedMessages.size should be > processedMessages.size
      processedMessages.sortBy(_.toInt).distinct shouldBe messages
    }(PatienceConfig(timeout = Span(15, Seconds)), implicitly)

    redeliveryHook.close()
    control.shutdown()
  }

  override def afterAll(): Unit = {
    super.afterAll()
    TestKit.shutdownActorSystem(system)
  }
} 
Example 2
Source File: BasicSimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class BasicSimulation extends Simulation {
  val kafkaConf = kafka
    // Kafka topic name
    .topic("test")
    // Kafka producer configs
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        // list of Kafka broker hostname and port pairs
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",

        // in most cases, StringSerializer or ByteArraySerializer
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .exec(
      kafka("request")
        // message to send
        .send[String]("foo"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
} 
Example 3
Source File: Kafka.scala    From event-sourcing-kafka-streams   with MIT License 5 votes vote down vote up
package org.amitayh.invoices.web

import java.time.Duration
import java.util.Collections.singletonList
import java.util.Properties

import cats.effect._
import cats.syntax.apply._
import cats.syntax.functor._
import fs2._
import org.amitayh.invoices.common.Config
import org.amitayh.invoices.common.Config.Topics.Topic
import org.apache.kafka.clients.consumer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.log4s.{Logger, getLogger}

import scala.collection.JavaConverters._

object Kafka {

  trait Producer[F[_], K, V] {
    def send(key: K, value: V): F[RecordMetadata]
  }

  object Producer {
    def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] =
      (key: K, value: V) => Async[F].async { cb =>
        val record = new ProducerRecord(topic.name, key, value)
        producer.send(record, (metadata: RecordMetadata, exception: Exception) => {
          if (exception != null) cb(Left(exception))
          else cb(Right(metadata))
        })
      }
  }

  def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer)
    }
    create.map(producer => (Producer(producer, topic), close(producer)))
  }

  def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId)
      val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer)
      consumer.subscribe(singletonList(topic.name))
      consumer
    }
    Stream.bracket(create)(close[F]).flatMap(consume[F, K, V])
  }

  private val logger: Logger = getLogger

  def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg))

  private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for {
    records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1))))
    record <- Stream.emits(records.iterator.asScala.toSeq)
  } yield record.key -> record.value

  private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] =
    Sync[F].delay(producer.close()) *> log(s"Producer closed")

  private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] =
    Sync[F].delay(consumer.close()) *> log("Consumer closed")

} 
Example 4
Source File: KafkaProducerUtils.scala    From bigdata-examples   with Apache License 2.0 5 votes vote down vote up
package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

} 
Example 5
Source File: KafkaMessageSender.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.client

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer


class MessageSender(val brokers: String) {

  import MessageSender._
  val producer = new KafkaProducer[Array[Byte], Array[Byte]](
    providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName))

  def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get
    producer.flush()
  }

  def writeValue(topic: String, value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get
    producer.flush()
  }

  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
} 
Example 6
Source File: KafkaMessageSender.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafka


  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
} 
Example 7
Source File: package.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.events
import java.util

import com.wavesplatform.events.protobuf.PBEvents
import com.wavesplatform.events.settings.BlockchainUpdatesSettings
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.config.SaslConfigs
import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer}

package object kafka {
  private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] {
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
    override def close(): Unit                                                 = {}

    override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] =
      PBEvents.protobuf(data).toByteArray
  }

  private object IntSerializer extends Serializer[Int] {
    val integerSerializer = new IntegerSerializer

    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey)
    override def close(): Unit                                                 = integerSerializer.close()

    override def serialize(topic: String, data: Int): Array[Byte] =
      integerSerializer.serialize(topic, data)
  }

  def createProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = new util.Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers)
    props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId)
    //  props.put(ProducerConfig.RETRIES_CONFIG, "0")

    // SASL_SSL
    if (settings.ssl.enabled) {
      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL")
      props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
      props.put(
        SaslConfigs.SASL_JAAS_CONFIG,
        s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';"
      )
    }
    props
  }

  def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = createProperties(settings)
    props.put(ProducerConfig.ACKS_CONFIG, "all")
    props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB
    props
  }

  def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] =
    new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer)

  def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = {
    val h = event match {
      case ap: BlockAppended                      => ap.toHeight
      case MicroBlockAppended(_, height, _, _, _) => height
      case RollbackCompleted(_, height)           => height
      case MicroBlockRollbackCompleted(_, height) => height
    }
    new ProducerRecord[Int, BlockchainUpdated](topic, h, event)
  }
} 
Example 8
Source File: KafkaWordCount.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 9
Source File: ConfigurationLoaderSpec.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.unit

import com.expedia.www.haystack.trace.commons.packer.PackerType
import com.expedia.www.haystack.trace.indexer.config.ProjectConfiguration
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalatest.{FunSpec, Matchers}

class ConfigurationLoaderSpec extends FunSpec with Matchers {

  val project = new ProjectConfiguration()
  describe("Configuration loader") {

    it("should load the health status config from base.conf") {
      project.healthStatusFilePath shouldEqual "/app/isHealthy"
    }

    it("should load the span buffer config only from base.conf") {
      val config = project.spanAccumulateConfig
      config.pollIntervalMillis shouldBe 2000L
      config.maxEntriesAllStores shouldBe 20000
      config.bufferingWindowMillis shouldBe 10000L
      config.packerType shouldEqual PackerType.SNAPPY
    }

    it("should load the kafka config from base.conf and one stream property from env variable") {
      val kafkaConfig = project.kafkaConfig
      kafkaConfig.produceTopic shouldBe "span-buffer"
      kafkaConfig.consumeTopic shouldBe "spans"
      kafkaConfig.numStreamThreads shouldBe 2
      kafkaConfig.commitOffsetRetries shouldBe 3
      kafkaConfig.commitBackoffInMillis shouldBe 200

      kafkaConfig.maxWakeups shouldBe 5
      kafkaConfig.wakeupTimeoutInMillis shouldBe 5000

      kafkaConfig.consumerProps.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG) shouldBe "earliest"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG) shouldBe "haystack-trace-indexer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG) shouldBe "false"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringDeserializer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG) shouldBe "com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer"

      kafkaConfig.consumerCloseTimeoutInMillis shouldBe 30000

      kafkaConfig.producerProps.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.producerProps.getProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.ByteArraySerializer"
      kafkaConfig.producerProps.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringSerializer"
    }

    it("should load the service metadata config from base.conf") {
      val config = project.serviceMetadataWriteConfig
      config.flushIntervalInSec shouldBe 60
      config.flushOnMaxOperationCount shouldBe 10000
      config.esEndpoint shouldBe "http://elasticsearch:9200"
      config.maxInFlightBulkRequests shouldBe 10
      config.maxDocsInBulk shouldBe 100
      config.maxBulkDocSizeInBytes shouldBe 1000000
      config.indexTemplateJson shouldBe Some("some_template_json")
      config.consistencyLevel shouldBe "one"
      config.readTimeoutMillis shouldBe 5000
      config.connectionTimeoutMillis shouldBe 10000
      config.indexName shouldBe "service-metadata"
      config.indexType shouldBe "metadata"
      config.retryConfig.maxRetries shouldBe 10
      config.retryConfig.backOffInMillis shouldBe 100
      config.retryConfig.backoffFactor shouldBe 2
    }

    it("should load the trace backend config from base.conf and few properties overridden from env variable") {
      val backendConfiguration = project.backendConfig

      backendConfiguration.maxInFlightRequests shouldBe 100
    }

    it("should load the elastic search config from base.conf and one property overridden from env variable") {
      val elastic = project.elasticSearchConfig
      elastic.endpoint shouldBe "http://elasticsearch:9200"
      elastic.maxInFlightBulkRequests shouldBe 10
      elastic.maxDocsInBulk shouldBe 100
      elastic.maxBulkDocSizeInBytes shouldBe 1000000
      elastic.indexTemplateJson shouldBe Some("some_template_json")
      elastic.consistencyLevel shouldBe "one"
      elastic.readTimeoutMillis shouldBe 5000
      elastic.connectionTimeoutMillis shouldBe 10000
      elastic.indexNamePrefix shouldBe "haystack-test"
      elastic.indexType shouldBe "spans"
      elastic.retryConfig.maxRetries shouldBe 10
      elastic.retryConfig.backOffInMillis shouldBe 1000
      elastic.retryConfig.backoffFactor shouldBe 2
      elastic.indexHourBucket shouldBe 6
      elastic.maxConnectionsPerRoute shouldBe 10

      elastic.awsRequestSigningConfiguration.enabled shouldEqual false
      elastic.awsRequestSigningConfiguration.region shouldEqual "us-west-2"
      elastic.awsRequestSigningConfiguration.awsServiceName shouldEqual "es"
      elastic.awsRequestSigningConfiguration.accessKey shouldBe None
      elastic.awsRequestSigningConfiguration.secretKey shouldBe None
    }
  }
} 
Example 10
Source File: KafkaTestClient.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
} 
Example 11
Source File: KafkaWordCount.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils



    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
//
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 12
Source File: KafkaWordCount.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

} 
Example 13
Source File: EmbeddedKsqlEngine.scala    From ksql-jdbc-driver   with Apache License 2.0 5 votes vote down vote up
package com.github.mmolimar.ksql.jdbc.embedded

import java.io.IOException

import com.github.mmolimar.ksql.jdbc.utils.TestUtils
import io.confluent.ksql.rest.server.{KsqlRestApplication, KsqlRestConfig}
import io.confluent.ksql.version.metrics.VersionCheckerAgent
import io.confluent.rest.RestConfig
import kafka.utils.Logging
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalamock.scalatest.MockFactory
import io.confluent.ksql.util.KsqlConfig

import scala.collection.JavaConverters._

class EmbeddedKsqlEngine(port: Int = TestUtils.getAvailablePort, brokerList: String, connectUrl: String) extends Logging with MockFactory {

  private val config = new KsqlRestConfig(Map(
    RestConfig.LISTENERS_CONFIG -> s"http://localhost:$port",
    ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList,
    KsqlConfig.CONNECT_URL_PROPERTY -> connectUrl,
    "ksql.service.id" -> "ksql-jdbc",
    "ksql.streams.auto.offset.reset" -> "latest",
    "ksql.command.topic.suffix" -> "commands"
  ).asJava)

  lazy val ksqlEngine: KsqlRestApplication = {
    import io.confluent.ksql.rest.server.mock.ksqlRestApplication

    val versionCheckerAgent = mock[VersionCheckerAgent]
    (versionCheckerAgent.start _).expects(*, *).returns((): Unit).anyNumberOfTimes
    (versionCheckerAgent.updateLastRequestTime _).expects().returns((): Unit).anyNumberOfTimes
    ksqlRestApplication(config, versionCheckerAgent)
  }

  @throws[IOException]
  def startup(): Unit = {
    info("Starting up embedded KSQL engine")

    ksqlEngine.start()

    info("Started embedded Zookeeper: " + getConnection)
  }

  def shutdown(): Unit = {
    info("Shutting down embedded KSQL engine")

    TestUtils.swallow(ksqlEngine.stop())

    info("Stopped embedded KSQL engine")
  }

  def getPort: Int = port

  def getConnection: String = "localhost:" + getPort

  override def toString: String = {
    val sb: StringBuilder = new StringBuilder("KSQL{")
    sb.append("connection=").append(getConnection)
    sb.append('}')

    sb.toString
  }

} 
Example 14
Source File: KafkaWordCount.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 15
Source File: ThrottledSimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class ThrottledSimulation extends Simulation {
  val kafkaConf = kafka
    // Kafka topic name
    .topic("test")
    // Kafka producer configs
    .properties(
    Map(
      ProducerConfig.ACKS_CONFIG -> "1",
      // list of Kafka broker hostname and port pairs
      ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",

      // in most cases, StringSerializer or ByteArraySerializer
      ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
        "org.apache.kafka.common.serialization.StringSerializer",
      ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
        "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .forever(
      exec(
        kafka("request")
          // message to send
          .send[String]("foo"))
    )

  setUp(
    scn.inject(atOnceUsers(10)))
    .throttle(jumpToRps(10), holdFor(30 seconds))
    .protocols(kafkaConf)
} 
Example 16
Source File: FeederSimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederSimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    // You can also use feeder
    .feed(csv("test.csv").circular)
    .exec(kafka("request").send[String]("${foo}"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
} 
Example 17
Source File: ByteArraySimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class ByteArraySimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer"))

  val scn = scenario("Kafka Test")
    .exec(kafka("request").send("foo".getBytes: Array[Byte]))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
} 
Example 18
Source File: FeederByteArraySimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederByteArraySimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer"))

  val scn = scenario("Kafka Test")
    .feed(csv("test.csv").circular)
    .exec(
      kafka("request")
        .send(session => session("foo").validate[String].map(s => s.getBytes)))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
} 
Example 19
Source File: FeederKeyValueSimulation.scala    From gatling-kafka   with Apache License 2.0 5 votes vote down vote up
package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederKeyValueSimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .feed(csv("kv.csv").circular)
    // You can also set the key that will be included in the record.
    //
    // The content of the CSV file above would be like this:
    //   key,value
    //   k1,v1
    //   k2,v2
    //   k3,v3
    //   ...
    //
    // And each line corresponds to a record sent to Kafka.
    .exec(kafka("request").send[String, String]("${key}", "${value}"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
} 
Example 20
Source File: KafkaWordCount.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 21
Source File: EmbeddedKafkaCustomConfigSpec.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport {
  final val TwoMegabytes   = 2097152
  final val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(
          KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
          KafkaConfig.MessageMaxBytesProp      -> s"$ThreeMegabytes"
        )

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes"
        )

      implicit val customKafkaConfig: EmbeddedKafkaConfig =
        EmbeddedKafkaConfig(
          customBrokerProperties = customBrokerConfig,
          customProducerProperties = customProducerConfig,
          customConsumerProperties = customConsumerConfig
        )

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic      = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Iterator.continually(Random.nextPrintableChar) take length mkString
} 
Example 22
Source File: MultiDataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
} 
Example 23
Source File: StreamStaticDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
} 
Example 24
Source File: DataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
} 
Example 25
Source File: StreamStreamDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
} 
Example 26
Source File: ProducerSettings.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.producer

import org.apache.kafka.clients.producer.ProducerConfig
import zio.duration._

case class ProducerSettings(
  bootstrapServers: List[String],
  closeTimeout: Duration,
  properties: Map[String, AnyRef]
) {
  def driverSettings: Map[String, AnyRef] =
    Map(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstrapServers.mkString(",")) ++
      properties

  def withBootstrapServers(servers: List[String]): ProducerSettings =
    copy(bootstrapServers = servers)

  def withCloseTimeout(duration: Duration): ProducerSettings =
    copy(closeTimeout = duration)

  def withProperty(key: String, value: AnyRef): ProducerSettings =
    copy(properties = properties + (key -> value))

  def withProperties(kvs: (String, AnyRef)*): ProducerSettings =
    withProperties(kvs.toMap)

  def withProperties(kvs: Map[String, AnyRef]): ProducerSettings =
    copy(properties = properties ++ kvs)
}

object ProducerSettings {
  def apply(bootstrapServers: List[String]): ProducerSettings =
    new ProducerSettings(bootstrapServers, 30.seconds, Map())
} 
Example 27
Source File: KafkaTransmitter.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.simulator.transmitters

import java.util.Properties

import akka.actor.{ActorLogging, Props}
import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import scala.sys.SystemProperties
import com.typesafe.config.Config


object KafkaTransmitter {
  def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic))
}

class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging {

  private val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers"))
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer"))
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer"))

  // Enable settings for a secure environment, if necessary.
  // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html
  val systemProperties = new SystemProperties
  if (config.getBoolean("transmitter.kafka.security-enabled")) {
    props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol"))
    systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file"))
  }

  private val producer = new KafkaProducer[String, String](props)

  def receive = {
    case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV))
  }

  override def postStop(): Unit = {
    producer.close()
    log.info("KafkaTransmitter closed its producer.")
  }
} 
Example 28
Source File: EventProducer.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
} 
Example 29
Source File: KafkaWordCount.scala    From AI   with Apache License 2.0 5 votes vote down vote up
package com.bigchange.basic

import java.util

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").
      set("spark.streaming.receiver.writeAheadLog.enable", "true").
      set("spark.streaming.kafka.maxRatePerPartition", "1000")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // 设置 checkpoint,这是考虑到了有 window 操作,window 操作一般是需要进行 checkpoint
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap

    // createStream 返回的是一个 Tuple2,具有 key,value,这里只关注 value.
    // 注意这里是 Receiver-based 方式(还提供了 non-receiver 模式),默认配置下,这种方式是会在 receiver 挂掉
    // 丢失数据的,需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整.
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
    val words = lines.flatMap(_.split(" "))

    // 统计的是 10 分钟内的单词数量,每隔 10 秒统计 1 次
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2).
      filter(x => x._2 > 0)

    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    // 需要注意的是这里是 broker list,为 host:port,host:port 形式
    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new util.HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while (true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

} 
Example 30
Source File: KafkaWordCount.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 31
Source File: EmbeddedKafkaCustomConfigSpec.scala    From scalatest-embedded-kafka   with MIT License 4 votes vote down vote up
package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec
    extends EmbeddedKafkaSpecSupport
    with EmbeddedKafka {
  val TwoMegabytes = 2097152
  val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
            KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes")

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes")

      implicit val customKafkaConfig =
        EmbeddedKafkaConfig(customBrokerProperties = customBrokerConfig,
                            customProducerProperties = customProducerConfig,
                            customConsumerProperties = customConsumerConfig)

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Stream.continually(Random.nextPrintableChar) take length mkString
}