org.apache.kafka.clients.producer.ProducerConfig Scala Example

Source File: IntegrationTest.scala From kmq with Apache License 2.0

6 votes

package com.softwaremill.kmq.redelivery

import java.time.Duration
import java.util.Random

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.testkit.TestKit
import com.softwaremill.kmq._
import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.time.{Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}

import scala.collection.mutable.ArrayBuffer

class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers {

  implicit val materializer = ActorMaterializer()
  import system.dispatcher

  "KMQ" should "resend message if not committed" in {
    val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}"
    val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis,
    1000)

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withBootstrapServers(bootstrapServer)
      .withGroupId(kmqConfig.getMsgConsumerGroupId)
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    val markerProducerSettings = ProducerSettings(system,
      new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer())
      .withBootstrapServers(bootstrapServer)
      .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName)
    val markerProducer = markerProducerSettings.createKafkaProducer()

    val random = new Random()

    lazy val processedMessages = ArrayBuffer[String]()
    lazy val receivedMessages = ArrayBuffer[String]()

    val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic
      .map { msg =>
      ProducerMessage.Message(
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg)
    }
      .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker
      .map(_.message.passThrough)
      .mapAsync(1) { msg =>
        msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched
      }
      .map { msg =>
        receivedMessages += msg.value
        msg
      }
      .filter(_ => random.nextInt(5) != 0)
      .map { processedMessage =>
        processedMessages += processedMessage.value
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE)
      }
      .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers
      .run()

    val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig)

    val messages = (0 to 20).map(_.toString)
    messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg))

    eventually {
      receivedMessages.size should be > processedMessages.size
      processedMessages.sortBy(_.toInt).distinct shouldBe messages
    }(PatienceConfig(timeout = Span(15, Seconds)), implicitly)

    redeliveryHook.close()
    control.shutdown()
  }

  override def afterAll(): Unit = {
    super.afterAll()
    TestKit.shutdownActorSystem(system)
  }
}

Source File: BasicSimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class BasicSimulation extends Simulation {
  val kafkaConf = kafka
    // Kafka topic name
    .topic("test")
    // Kafka producer configs
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        // list of Kafka broker hostname and port pairs
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",

        // in most cases, StringSerializer or ByteArraySerializer
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .exec(
      kafka("request")
        // message to send
        .send[String]("foo"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
}

Source File: Kafka.scala From event-sourcing-kafka-streams with MIT License

5 votes

package org.amitayh.invoices.web

import java.time.Duration
import java.util.Collections.singletonList
import java.util.Properties

import cats.effect._
import cats.syntax.apply._
import cats.syntax.functor._
import fs2._
import org.amitayh.invoices.common.Config
import org.amitayh.invoices.common.Config.Topics.Topic
import org.apache.kafka.clients.consumer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.log4s.{Logger, getLogger}

import scala.collection.JavaConverters._

object Kafka {

  trait Producer[F[_], K, V] {
    def send(key: K, value: V): F[RecordMetadata]
  }

  object Producer {
    def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] =
      (key: K, value: V) => Async[F].async { cb =>
        val record = new ProducerRecord(topic.name, key, value)
        producer.send(record, (metadata: RecordMetadata, exception: Exception) => {
          if (exception != null) cb(Left(exception))
          else cb(Right(metadata))
        })
      }
  }

  def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer)
    }
    create.map(producer => (Producer(producer, topic), close(producer)))
  }

  def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId)
      val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer)
      consumer.subscribe(singletonList(topic.name))
      consumer
    }
    Stream.bracket(create)(close[F]).flatMap(consume[F, K, V])
  }

  private val logger: Logger = getLogger

  def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg))

  private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for {
    records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1))))
    record <- Stream.emits(records.iterator.asScala.toSeq)
  } yield record.key -> record.value

  private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] =
    Sync[F].delay(producer.close()) *> log(s"Producer closed")

  private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] =
    Sync[F].delay(consumer.close()) *> log("Consumer closed")

}

Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0

5 votes

package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

}

Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0

5 votes

package com.lightbend.modelserving.client

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer


class MessageSender(val brokers: String) {

  import MessageSender._
  val producer = new KafkaProducer[Array[Byte], Array[Byte]](
    providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName))

  def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get
    producer.flush()
  }

  def writeValue(topic: String, value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get
    producer.flush()
  }

  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0

5 votes

package com.lightbend.scala.kafka


  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: package.scala From Waves with MIT License

5 votes

package com.wavesplatform.events
import java.util

import com.wavesplatform.events.protobuf.PBEvents
import com.wavesplatform.events.settings.BlockchainUpdatesSettings
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.config.SaslConfigs
import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer}

package object kafka {
  private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] {
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
    override def close(): Unit                                                 = {}

    override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] =
      PBEvents.protobuf(data).toByteArray
  }

  private object IntSerializer extends Serializer[Int] {
    val integerSerializer = new IntegerSerializer

    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey)
    override def close(): Unit                                                 = integerSerializer.close()

    override def serialize(topic: String, data: Int): Array[Byte] =
      integerSerializer.serialize(topic, data)
  }

  def createProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = new util.Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers)
    props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId)
    //  props.put(ProducerConfig.RETRIES_CONFIG, "0")

    // SASL_SSL
    if (settings.ssl.enabled) {
      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL")
      props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
      props.put(
        SaslConfigs.SASL_JAAS_CONFIG,
        s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';"
      )
    }
    props
  }

  def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = createProperties(settings)
    props.put(ProducerConfig.ACKS_CONFIG, "all")
    props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB
    props
  }

  def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] =
    new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer)

  def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = {
    val h = event match {
      case ap: BlockAppended                      => ap.toHeight
      case MicroBlockAppended(_, height, _, _, _) => height
      case RollbackCompleted(_, height)           => height
      case MicroBlockRollbackCompleted(_, height) => height
    }
    new ProducerRecord[Int, BlockchainUpdated](topic, h, event)
  }
}

Source File: KafkaWordCount.scala From BigDatalog with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: ConfigurationLoaderSpec.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.unit

import com.expedia.www.haystack.trace.commons.packer.PackerType
import com.expedia.www.haystack.trace.indexer.config.ProjectConfiguration
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalatest.{FunSpec, Matchers}

class ConfigurationLoaderSpec extends FunSpec with Matchers {

  val project = new ProjectConfiguration()
  describe("Configuration loader") {

    it("should load the health status config from base.conf") {
      project.healthStatusFilePath shouldEqual "/app/isHealthy"
    }

    it("should load the span buffer config only from base.conf") {
      val config = project.spanAccumulateConfig
      config.pollIntervalMillis shouldBe 2000L
      config.maxEntriesAllStores shouldBe 20000
      config.bufferingWindowMillis shouldBe 10000L
      config.packerType shouldEqual PackerType.SNAPPY
    }

    it("should load the kafka config from base.conf and one stream property from env variable") {
      val kafkaConfig = project.kafkaConfig
      kafkaConfig.produceTopic shouldBe "span-buffer"
      kafkaConfig.consumeTopic shouldBe "spans"
      kafkaConfig.numStreamThreads shouldBe 2
      kafkaConfig.commitOffsetRetries shouldBe 3
      kafkaConfig.commitBackoffInMillis shouldBe 200

      kafkaConfig.maxWakeups shouldBe 5
      kafkaConfig.wakeupTimeoutInMillis shouldBe 5000

      kafkaConfig.consumerProps.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG) shouldBe "earliest"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG) shouldBe "haystack-trace-indexer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG) shouldBe "false"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringDeserializer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG) shouldBe "com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer"

      kafkaConfig.consumerCloseTimeoutInMillis shouldBe 30000

      kafkaConfig.producerProps.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.producerProps.getProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.ByteArraySerializer"
      kafkaConfig.producerProps.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringSerializer"
    }

    it("should load the service metadata config from base.conf") {
      val config = project.serviceMetadataWriteConfig
      config.flushIntervalInSec shouldBe 60
      config.flushOnMaxOperationCount shouldBe 10000
      config.esEndpoint shouldBe "http://elasticsearch:9200"
      config.maxInFlightBulkRequests shouldBe 10
      config.maxDocsInBulk shouldBe 100
      config.maxBulkDocSizeInBytes shouldBe 1000000
      config.indexTemplateJson shouldBe Some("some_template_json")
      config.consistencyLevel shouldBe "one"
      config.readTimeoutMillis shouldBe 5000
      config.connectionTimeoutMillis shouldBe 10000
      config.indexName shouldBe "service-metadata"
      config.indexType shouldBe "metadata"
      config.retryConfig.maxRetries shouldBe 10
      config.retryConfig.backOffInMillis shouldBe 100
      config.retryConfig.backoffFactor shouldBe 2
    }

    it("should load the trace backend config from base.conf and few properties overridden from env variable") {
      val backendConfiguration = project.backendConfig

      backendConfiguration.maxInFlightRequests shouldBe 100
    }

    it("should load the elastic search config from base.conf and one property overridden from env variable") {
      val elastic = project.elasticSearchConfig
      elastic.endpoint shouldBe "http://elasticsearch:9200"
      elastic.maxInFlightBulkRequests shouldBe 10
      elastic.maxDocsInBulk shouldBe 100
      elastic.maxBulkDocSizeInBytes shouldBe 1000000
      elastic.indexTemplateJson shouldBe Some("some_template_json")
      elastic.consistencyLevel shouldBe "one"
      elastic.readTimeoutMillis shouldBe 5000
      elastic.connectionTimeoutMillis shouldBe 10000
      elastic.indexNamePrefix shouldBe "haystack-test"
      elastic.indexType shouldBe "spans"
      elastic.retryConfig.maxRetries shouldBe 10
      elastic.retryConfig.backOffInMillis shouldBe 1000
      elastic.retryConfig.backoffFactor shouldBe 2
      elastic.indexHourBucket shouldBe 6
      elastic.maxConnectionsPerRoute shouldBe 10

      elastic.awsRequestSigningConfiguration.enabled shouldEqual false
      elastic.awsRequestSigningConfiguration.region shouldEqual "us-west-2"
      elastic.awsRequestSigningConfiguration.awsServiceName shouldEqual "es"
      elastic.awsRequestSigningConfiguration.accessKey shouldBe None
      elastic.awsRequestSigningConfiguration.secretKey shouldBe None
    }
  }
}

Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
}

Source File: KafkaWordCount.scala From spark1.52 with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils



    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
//
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: KafkaWordCount.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}

Source File: EmbeddedKsqlEngine.scala From ksql-jdbc-driver with Apache License 2.0

5 votes

package com.github.mmolimar.ksql.jdbc.embedded

import java.io.IOException

import com.github.mmolimar.ksql.jdbc.utils.TestUtils
import io.confluent.ksql.rest.server.{KsqlRestApplication, KsqlRestConfig}
import io.confluent.ksql.version.metrics.VersionCheckerAgent
import io.confluent.rest.RestConfig
import kafka.utils.Logging
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalamock.scalatest.MockFactory
import io.confluent.ksql.util.KsqlConfig

import scala.collection.JavaConverters._

class EmbeddedKsqlEngine(port: Int = TestUtils.getAvailablePort, brokerList: String, connectUrl: String) extends Logging with MockFactory {

  private val config = new KsqlRestConfig(Map(
    RestConfig.LISTENERS_CONFIG -> s"http://localhost:$port",
    ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList,
    KsqlConfig.CONNECT_URL_PROPERTY -> connectUrl,
    "ksql.service.id" -> "ksql-jdbc",
    "ksql.streams.auto.offset.reset" -> "latest",
    "ksql.command.topic.suffix" -> "commands"
  ).asJava)

  lazy val ksqlEngine: KsqlRestApplication = {
    import io.confluent.ksql.rest.server.mock.ksqlRestApplication

    val versionCheckerAgent = mock[VersionCheckerAgent]
    (versionCheckerAgent.start _).expects(*, *).returns((): Unit).anyNumberOfTimes
    (versionCheckerAgent.updateLastRequestTime _).expects().returns((): Unit).anyNumberOfTimes
    ksqlRestApplication(config, versionCheckerAgent)
  }

  @throws[IOException]
  def startup(): Unit = {
    info("Starting up embedded KSQL engine")

    ksqlEngine.start()

    info("Started embedded Zookeeper: " + getConnection)
  }

  def shutdown(): Unit = {
    info("Shutting down embedded KSQL engine")

    TestUtils.swallow(ksqlEngine.stop())

    info("Stopped embedded KSQL engine")
  }

  def getPort: Int = port

  def getConnection: String = "localhost:" + getPort

  override def toString: String = {
    val sb: StringBuilder = new StringBuilder("KSQL{")
    sb.append("connection=").append(getConnection)
    sb.append('}')

    sb.toString
  }

}

Source File: KafkaWordCount.scala From multi-tenancy-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: ThrottledSimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class ThrottledSimulation extends Simulation {
  val kafkaConf = kafka
    // Kafka topic name
    .topic("test")
    // Kafka producer configs
    .properties(
    Map(
      ProducerConfig.ACKS_CONFIG -> "1",
      // list of Kafka broker hostname and port pairs
      ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",

      // in most cases, StringSerializer or ByteArraySerializer
      ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
        "org.apache.kafka.common.serialization.StringSerializer",
      ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
        "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .forever(
      exec(
        kafka("request")
          // message to send
          .send[String]("foo"))
    )

  setUp(
    scn.inject(atOnceUsers(10)))
    .throttle(jumpToRps(10), holdFor(30 seconds))
    .protocols(kafkaConf)
}

Source File: FeederSimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederSimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    // You can also use feeder
    .feed(csv("test.csv").circular)
    .exec(kafka("request").send[String]("${foo}"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
}

Source File: ByteArraySimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class ByteArraySimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer"))

  val scn = scenario("Kafka Test")
    .exec(kafka("request").send("foo".getBytes: Array[Byte]))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
}

Source File: FeederByteArraySimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederByteArraySimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.ByteArraySerializer"))

  val scn = scenario("Kafka Test")
    .feed(csv("test.csv").circular)
    .exec(
      kafka("request")
        .send(session => session("foo").validate[String].map(s => s.getBytes)))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
}

Source File: FeederKeyValueSimulation.scala From gatling-kafka with Apache License 2.0

5 votes

package com.github.mnogu.gatling.kafka.test

import io.gatling.core.Predef._
import org.apache.kafka.clients.producer.ProducerConfig
import scala.concurrent.duration._

import com.github.mnogu.gatling.kafka.Predef._

class FeederKeyValueSimulation extends Simulation {
  val kafkaConf = kafka
    .topic("test")
    .properties(
      Map(
        ProducerConfig.ACKS_CONFIG -> "1",
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
        ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer",
        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
          "org.apache.kafka.common.serialization.StringSerializer"))

  val scn = scenario("Kafka Test")
    .feed(csv("kv.csv").circular)
    // You can also set the key that will be included in the record.
    //
    // The content of the CSV file above would be like this:
    //   key,value
    //   k1,v1
    //   k2,v2
    //   k3,v3
    //   ...
    //
    // And each line corresponds to a record sent to Kafka.
    .exec(kafka("request").send[String, String]("${key}", "${value}"))

  setUp(
    scn
      .inject(constantUsersPerSec(10) during(90 seconds)))
    .protocols(kafkaConf)
}

Source File: KafkaWordCount.scala From sparkoscope with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: EmbeddedKafkaCustomConfigSpec.scala From embedded-kafka with MIT License

5 votes

package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport {
  final val TwoMegabytes   = 2097152
  final val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(
          KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
          KafkaConfig.MessageMaxBytesProp      -> s"$ThreeMegabytes"
        )

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes"
        )

      implicit val customKafkaConfig: EmbeddedKafkaConfig =
        EmbeddedKafkaConfig(
          customBrokerProperties = customBrokerConfig,
          customProducerProperties = customProducerConfig,
          customConsumerProperties = customConsumerConfig
        )

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic      = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Iterator.continually(Random.nextPrintableChar) take length mkString
}

Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
}

Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
}

Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
}

Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
}

Source File: ProducerSettings.scala From zio-kafka with Apache License 2.0

5 votes

package zio.kafka.producer

import org.apache.kafka.clients.producer.ProducerConfig
import zio.duration._

case class ProducerSettings(
  bootstrapServers: List[String],
  closeTimeout: Duration,
  properties: Map[String, AnyRef]
) {
  def driverSettings: Map[String, AnyRef] =
    Map(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstrapServers.mkString(",")) ++
      properties

  def withBootstrapServers(servers: List[String]): ProducerSettings =
    copy(bootstrapServers = servers)

  def withCloseTimeout(duration: Duration): ProducerSettings =
    copy(closeTimeout = duration)

  def withProperty(key: String, value: AnyRef): ProducerSettings =
    copy(properties = properties + (key -> value))

  def withProperties(kvs: (String, AnyRef)*): ProducerSettings =
    withProperties(kvs.toMap)

  def withProperties(kvs: Map[String, AnyRef]): ProducerSettings =
    copy(properties = properties ++ kvs)
}

object ProducerSettings {
  def apply(bootstrapServers: List[String]): ProducerSettings =
    new ProducerSettings(bootstrapServers, 30.seconds, Map())
}

Source File: KafkaTransmitter.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.transmitters

import java.util.Properties

import akka.actor.{ActorLogging, Props}
import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import scala.sys.SystemProperties
import com.typesafe.config.Config


object KafkaTransmitter {
  def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic))
}

class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging {

  private val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers"))
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer"))
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer"))

  // Enable settings for a secure environment, if necessary.
  // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html
  val systemProperties = new SystemProperties
  if (config.getBoolean("transmitter.kafka.security-enabled")) {
    props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol"))
    systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file"))
  }

  private val producer = new KafkaProducer[String, String](props)

  def receive = {
    case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV))
  }

  override def postStop(): Unit = {
    producer.close()
    log.info("KafkaTransmitter closed its producer.")
  }
}

Source File: EventProducer.scala From rokku with Apache License 2.0

5 votes

package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
}

Source File: KafkaWordCount.scala From AI with Apache License 2.0

5 votes

package com.bigchange.basic

import java.util

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").
      set("spark.streaming.receiver.writeAheadLog.enable", "true").
      set("spark.streaming.kafka.maxRatePerPartition", "1000")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // 设置 checkpoint，这是考虑到了有 window 操作，window 操作一般是需要进行 checkpoint
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap

    // createStream 返回的是一个 Tuple2，具有 key，value，这里只关注 value.
    // 注意这里是 Receiver-based 方式（还提供了 non-receiver 模式），默认配置下，这种方式是会在 receiver 挂掉
    // 丢失数据的，需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整.
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
    val words = lines.flatMap(_.split(" "))

    // 统计的是 10 分钟内的单词数量，每隔 10 秒统计 1 次
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2).
      filter(x => x._2 > 0)

    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    // 需要注意的是这里是 broker list，为 host:port,host:port 形式
    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new util.HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while (true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}

Source File: KafkaWordCount.scala From drizzle-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: EmbeddedKafkaCustomConfigSpec.scala From scalatest-embedded-kafka with MIT License

4 votes

package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec
    extends EmbeddedKafkaSpecSupport
    with EmbeddedKafka {
  val TwoMegabytes = 2097152
  val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
            KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes")

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes")

      implicit val customKafkaConfig =
        EmbeddedKafkaConfig(customBrokerProperties = customBrokerConfig,
                            customProducerProperties = customProducerConfig,
                            customConsumerProperties = customConsumerConfig)

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Stream.continually(Random.nextPrintableChar) take length mkString
}

org.apache.kafka.clients.producer.ProducerConfig Scala Examples