org.apache.kafka.clients.producer.ProducerRecord Scala Example

Source File: IntegrationTest.scala From kmq with Apache License 2.0

6 votes

package com.softwaremill.kmq.redelivery

import java.time.Duration
import java.util.Random

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.testkit.TestKit
import com.softwaremill.kmq._
import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.time.{Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}

import scala.collection.mutable.ArrayBuffer

class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers {

  implicit val materializer = ActorMaterializer()
  import system.dispatcher

  "KMQ" should "resend message if not committed" in {
    val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}"
    val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis,
    1000)

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withBootstrapServers(bootstrapServer)
      .withGroupId(kmqConfig.getMsgConsumerGroupId)
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    val markerProducerSettings = ProducerSettings(system,
      new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer())
      .withBootstrapServers(bootstrapServer)
      .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName)
    val markerProducer = markerProducerSettings.createKafkaProducer()

    val random = new Random()

    lazy val processedMessages = ArrayBuffer[String]()
    lazy val receivedMessages = ArrayBuffer[String]()

    val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic
      .map { msg =>
      ProducerMessage.Message(
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg)
    }
      .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker
      .map(_.message.passThrough)
      .mapAsync(1) { msg =>
        msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched
      }
      .map { msg =>
        receivedMessages += msg.value
        msg
      }
      .filter(_ => random.nextInt(5) != 0)
      .map { processedMessage =>
        processedMessages += processedMessage.value
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE)
      }
      .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers
      .run()

    val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig)

    val messages = (0 to 20).map(_.toString)
    messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg))

    eventually {
      receivedMessages.size should be > processedMessages.size
      processedMessages.sortBy(_.toInt).distinct shouldBe messages
    }(PatienceConfig(timeout = Span(15, Seconds)), implicitly)

    redeliveryHook.close()
    control.shutdown()
  }

  override def afterAll(): Unit = {
    super.afterAll()
    TestKit.shutdownActorSystem(system)
  }
}

Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0

5 votes

package com.lightbend.scala.kafka


  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: ExampleApp.scala From kafka4s with Apache License 2.0

5 votes

package example3

import cats.effect._
import cats.implicits._
import fs2.Stream
import com.banno.kafka._
import com.banno.kafka.admin._
import com.banno.kafka.consumer._
import com.banno.kafka.producer._
import org.apache.kafka.clients.admin.NewTopic
import org.apache.kafka.clients.producer.ProducerRecord
import scala.concurrent.duration._
import scala.util.Random

final class ExampleApp[F[_]: Concurrent: ContextShift: Timer] {

  // Change these for your environment as needed
  val topic = new NewTopic(s"example3", 1, 3.toShort)
  val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093"

  val example: F[Unit] =
    for {
      _ <- Sync[F].delay(println("Starting kafka4s example"))

      _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic)

      writeStream = Stream
        .resource(ProducerApi.resource[F, Int, Int](BootstrapServers(kafkaBootstrapServers)))
        .flatMap { producer =>
          Stream
            .awakeDelay[F](1 second)
            .evalMap { _ =>
              Sync[F].delay(Random.nextInt()).flatMap { i =>
                producer.sendAndForget(new ProducerRecord(topic.name, i, i))
              }
            }
        }

      readStream = Stream
        .resource(
          ConsumerApi
            .resource[F, Int, Int](
              BootstrapServers(kafkaBootstrapServers),
              GroupId("example3"),
              AutoOffsetReset.earliest,
              EnableAutoCommit(true)
            )
        )
        .evalTap(_.subscribe(topic.name))
        .flatMap(
          _.recordStream(1.second)
            .map(_.value)
            .filter(_ % 2 == 0)
            .evalMap(i => Sync[F].delay(println(i)))
        )

      _ <- writeStream
        .merge(readStream)
        .onFinalize(Sync[F].delay(println("Finished kafka4s example")))
        .compile
        .drain
    } yield ()
}

object ExampleApp {
  def apply[F[_]: Concurrent: ContextShift: Timer] = new ExampleApp[F]
}

Source File: KafkaStreamingWriterRDD.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.writer

import com.github.benfradet.spark.kafka010.writer._
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.spark.sql.SparkSession

object KafkaStreamingWriterRDD extends App {

  val spark = SparkSession
    .builder
    .master("local")
    .appName("streaming-writer")
    .getOrCreate()

  setupLogging()

  val numbersRDD = spark.range(400, 420).rdd

  numbersRDD.writeToKafka(
    numbersProducerConfig,
    s => new ProducerRecord[String, String](topic, "key " + s , s.toString)
  )
}

Source File: CommitConsumerToFlowProducer.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.integration

import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ProducerMessage, Subscriptions}
import akka.stream.scaladsl.Sink
import com.example._
import org.apache.kafka.clients.producer.ProducerRecord

object CommitConsumerToFlowProducer extends App {

  
  val done = Consumer.committableSource(consumerSettings, Subscriptions.topics(topic1))
    .map { msg =>
      println(s"topic1 -> topic2: $msg")
      ProducerMessage.Message(new ProducerRecord[Array[Byte], String](
        topic2,
        msg.record.value
      ), msg.committableOffset)
    }
    .via(Producer.flow(producerSettings))
    .mapAsync(producerSettings.parallelism) { result =>
      result.message.passThrough.commitScaladsl()
    }
    .runWith(Sink.ignore)
}

Source File: CommitConsumerToSinkProducer.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.integration
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ProducerMessage, Subscriptions}
import org.apache.kafka.clients.producer.ProducerRecord

import com.example._

object CommitConsumerToSinkProducer {

  
  val done = Consumer.committableSource(consumerSettings, Subscriptions.topics(topic1))
    .map { msg =>
      println(s"topic1 -> topic2: $msg")
      ProducerMessage.Message(new ProducerRecord[Array[Byte], String](
        topic2,
        msg.record.value
      ), msg.committableOffset)
    }
    .runWith(Producer.commitableSink(producerSettings))

}

Source File: FlowProducer.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.producer

import akka.kafka.ProducerMessage
import akka.kafka.scaladsl.Producer
import akka.stream.scaladsl.{Sink, Source}
import org.apache.kafka.clients.producer.ProducerRecord
import com.example._

object FlowProducer extends App {

  val done = Source(100 to 111)
    .map { n =>
      val partition = 1
      ProducerMessage.Message(new ProducerRecord[Array[Byte], String](
        topic , partition, null, n.toString
      ), n)
    }
    .via(Producer.flow(producerSettings))
    .map { result =>
      val record = result.message.record
      println(s"${record.topic}/${record.partition} ${result.offset}: ${record.value}" +
        s"(${result.message.passThrough})")
      result
    }
    .runWith(Sink.ignore)
}

Source File: SimpleProducer.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.producer

import java.util.{Properties}

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}

object SimpleProducer extends App{
  val topic = "sample_topic"
  private val props = new Properties()

  props.put("bootstrap.servers", "localhost:9092")
  props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

  val producer = new KafkaProducer[String,String](props)
  try {
    for(i <- 0 to 10) {
      producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic"))
      println(s"Sent: $i")
    }
    println("Message sent successfully")
    producer.close()
  }
  catch {
    case ex: Exception =>
      ex.printStackTrace()
  }
}

Source File: AkkaActorKafkaProduder.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example

import akka.actor.ActorSystem
import cakesolutions.kafka.KafkaProducerRecord
import cakesolutions.kafka.akka.{KafkaProducerActor, ProducerRecords}
import org.apache.kafka.clients.producer.ProducerRecord

object AkkaActorKafkaProduder {
  def main(args: Array[String]): Unit = {
    actorProducer()
  }

  private def actorProducer() = {
    val system = ActorSystem()
    val producer = system.actorOf(KafkaProducerActor.props(kafkaProducerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar")
    )
    val message = ProducerRecords(batch)

    producer ! message
  }
}

Source File: BasicTest.scala From kafka-testing with Apache License 2.0

5 votes

package com.landoop.kafka.testing

import org.apache.avro.Schema
import org.apache.avro.generic.GenericData
import org.apache.kafka.clients.producer.ProducerRecord

class BasicTest extends ClusterTestingCapabilities {

  private val createAvroRecord = {
    val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}"
    val parser = new Schema.Parser
    val schema = parser.parse(userSchema)
    val avroRecord = new GenericData.Record(schema)
    avroRecord.put("name", "testUser")
    avroRecord
  }

  "KCluster" should {
    "start up and be able to handle avro records being sent " in {
      val topic = "testAvro" + System.currentTimeMillis()
      val avroRecord = createAvroRecord
      val objects = Array[AnyRef](avroRecord)
      val producerProps = stringAvroProducerProps
      val producer = createProducer[String, Any](producerProps)

      for (o <- objects) {
        val message = new ProducerRecord[String, Any](topic, o)
        producer.send(message)
      }
      val consumerProps = stringAvroConsumerProps()
      val consumer = createStringAvroConsumer(consumerProps)
      val records = consumeStringAvro(consumer, topic, objects.length)
      objects.toSeq shouldBe records
    }

    "write and read avro records" in {
      val topic = "testAvro" + System.currentTimeMillis()
      val avroRecord = createAvroRecord
      val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes)
      val producerProps = stringAvroProducerProps
      val producer = createProducer[String, Any](producerProps)
      for (o <- objects) {
        producer.send(new ProducerRecord[String, Any](topic, o))
      }
      val consumerProps = stringAvroConsumerProps("group" + System.currentTimeMillis())
      val consumer = createStringAvroConsumer(consumerProps)
      val records = consumeStringAvro(consumer, topic, objects.length)
      objects.deep shouldBe records.toArray.deep
    }
  }

}

Source File: package.scala From Waves with MIT License

5 votes

package com.wavesplatform.events
import java.util

import com.wavesplatform.events.protobuf.PBEvents
import com.wavesplatform.events.settings.BlockchainUpdatesSettings
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.config.SaslConfigs
import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer}

package object kafka {
  private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] {
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
    override def close(): Unit                                                 = {}

    override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] =
      PBEvents.protobuf(data).toByteArray
  }

  private object IntSerializer extends Serializer[Int] {
    val integerSerializer = new IntegerSerializer

    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey)
    override def close(): Unit                                                 = integerSerializer.close()

    override def serialize(topic: String, data: Int): Array[Byte] =
      integerSerializer.serialize(topic, data)
  }

  def createProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = new util.Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers)
    props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId)
    //  props.put(ProducerConfig.RETRIES_CONFIG, "0")

    // SASL_SSL
    if (settings.ssl.enabled) {
      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL")
      props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
      props.put(
        SaslConfigs.SASL_JAAS_CONFIG,
        s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';"
      )
    }
    props
  }

  def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = createProperties(settings)
    props.put(ProducerConfig.ACKS_CONFIG, "all")
    props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB
    props
  }

  def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] =
    new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer)

  def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = {
    val h = event match {
      case ap: BlockAppended                      => ap.toHeight
      case MicroBlockAppended(_, height, _, _, _) => height
      case RollbackCompleted(_, height)           => height
      case MicroBlockRollbackCompleted(_, height) => height
    }
    new ProducerRecord[Int, BlockchainUpdated](topic, h, event)
  }
}

Source File: HiveOrcBenchmark.scala From stream-reactor with Apache License 2.0

5 votes

package com.landoop.streamreactor.hive.it

import org.apache.kafka.clients.producer.ProducerRecord

import scala.concurrent.Future
import scala.io.Source
import scala.util.Try

object HiveOrcBenchmark extends App with PersonTestData with HiveTests {

  import scala.concurrent.ExecutionContext.Implicits.global

  val start = System.currentTimeMillis()

  val topic = createTopic()
  val taskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_sink_task_no_partitions-orc.json")).getLines().mkString("\n")
    .replace("{{TOPIC}}", topic)
    .replace("{{TABLE}}", topic)
    .replace("{{NAME}}", topic)
  postTask(taskDef)

  Future {
    val producer = stringStringProducer()
    val count = 10000000 // 10mil
    for (k <- 0 until count) {
      producer.send(new ProducerRecord(topic, JacksonSupport.mapper.writeValueAsString(person)))
      if (k % 100000 == 0) {
        println(s"Flushing records [total=$count]")
        producer.flush()
      }
    }
    producer.flush()
    producer.close()
  }

  Future {
    while (true) {
      Try {
        Thread.sleep(2000)
        withConn { conn =>
          val stmt = conn.createStatement
          val rs = stmt.executeQuery(s"select count(*) from $topic")
          rs.next()
          val total = rs.getLong(1)
          val time = System.currentTimeMillis() - start
          println(s"Total $total in ${time}ms which is ${total / (time / 1000)} records per second")
        }
      }
    }
    stopTask(topic)
  }
}

Source File: Producer.scala From fusion-data with Apache License 2.0

5 votes

package kafkasample.demo

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata }

object Producer {
  def main(args: Array[String]): Unit = {
    val props = new Properties()
    props.put("bootstrap.servers", "localhost:9092")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)
    try {
      run(producer)
    } finally {
      TimeUnit.SECONDS.sleep(5)
      producer.close()
    }
  }

  private def run[K, V](producer: KafkaProducer[String, String]) {
    val record =
      new ProducerRecord[String, String]("customerCountries", "羊八井222")
    producer.send(record, (metadata: RecordMetadata, e: Exception) => {
      if (e ne null) {
        e.printStackTrace()
      }
      println(s"metadata: $metadata")
    })
  }
}

Source File: KafkaBatchProducer.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.kafka2.writer

import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.immutable.Map
import scala.language.implicitConversions
import scala.reflect.runtime.universe._

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame

import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants}
import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException}


  def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = {
    def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName()
    logger.info(" @Begin --> " + MethodName)

    val kafkaProps: Properties = conf.kafkaProducerProps
    logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}")
    val kafkaTopic = conf.kafkaTopics
    val kafkaTopicsOptionsMap : Map[String, Map[String, String]] =
      KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf)
    logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap)
    val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap)
    logger.info("Begin Publishing to Kafka....")
    try {
      val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic)
      kafkaTopicOptions match {
        case None =>
          throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""")
        case Some(kafkaOptions) =>
          dataFrame
            .write
            .format(KafkaConstants.KAFKA_FORMAT)
            .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic)
            .options(kafkaOptions)
            .save()
      }
    }
    catch {
      case ex: Throwable => {
        ex.printStackTrace()
        val msg =
          s"""
             |kafkaTopic -> ${kafkaTopic}
             |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}}
          """.stripMargin
        throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}")
      }
    }
    logger.info("Publish to Kafka - Completed !")
  }
}

Source File: FlinkKafkaCodecSerde.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.flink

import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.connectors.kafka._

import cloudflow.streamlets.{ CodecInlet, CodecOutlet }

private[flink] class FlinkKafkaCodecSerializationSchema[T: TypeInformation](outlet: CodecOutlet[T], topic: String)
    extends KafkaSerializationSchema[T] {
  override def serialize(value: T, timestamp: java.lang.Long): ProducerRecord[Array[Byte], Array[Byte]] =
    new ProducerRecord(topic, outlet.codec.encode(value))
}

private[flink] class FlinkKafkaCodecDeserializationSchema[T: TypeInformation](inlet: CodecInlet[T]) extends KafkaDeserializationSchema[T] {
  override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): T = inlet.codec.decode(record.value)
  override def isEndOfStream(value: T): Boolean                                 = false
  override def getProducedType: TypeInformation[T]                              = implicitly[TypeInformation[T]]
}

Source File: KafkaSinkRef.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.akkastream

import scala.concurrent._
import scala.util._

import akka._
import akka.actor.ActorSystem
import akka.kafka._
import akka.kafka.ConsumerMessage._
import akka.kafka.scaladsl._
import akka.stream._
import akka.stream.scaladsl._

import org.apache.kafka.clients.producer.{ Callback, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization._

import cloudflow.streamlets._

final class KafkaSinkRef[T](
    system: ActorSystem,
    outlet: CodecOutlet[T],
    internalKafkaBootstrapServers: String,
    topic: Topic,
    killSwitch: SharedKillSwitch,
    completionPromise: Promise[Dun]
) extends WritableSinkRef[T] {
  private val producerSettings = ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer)
    .withBootstrapServers(topic.bootstrapServers.getOrElse(internalKafkaBootstrapServers))
    .withProperties(topic.kafkaProducerProperties)
  private val producer = producerSettings.createKafkaProducer()

  def sink: Sink[(T, Committable), NotUsed] = {
    system.log.info(s"Creating sink for topic: $topic")

    Flow[(T, Committable)]
      .map {
        case (value, offset) ⇒
          val key        = outlet.partitioner(value)
          val bytesValue = outlet.codec.encode(value)
          ProducerMessage.Message[Array[Byte], Array[Byte], Committable](new ProducerRecord(topic.name, key.getBytes("UTF8"), bytesValue),
                                                                         offset)
      }
      .via(Producer.flexiFlow(producerSettings.withProducer(producer)))
      .via(handleTermination)
      .to(Sink.ignore)
      .mapMaterializedValue(_ ⇒ NotUsed)
  }

  private def handleTermination[I]: Flow[I, I, NotUsed] =
    Flow[I]
      .via(killSwitch.flow)
      .alsoTo(
        Sink.onComplete {
          case Success(_) ⇒
            system.log.error(s"Stream has completed. Shutting down streamlet...")
            completionPromise.success(Dun)
          case Failure(e) ⇒
            system.log.error(e, "Stream has failed. Shutting down streamlet...")
            completionPromise.failure(e)
        }
      )

  def write(value: T): Future[T] = {
    val key        = outlet.partitioner(value)
    val bytesKey   = keyBytes(key)
    val bytesValue = outlet.codec.encode(value)
    val record     = new ProducerRecord(topic.name, bytesKey, bytesValue)
    val promise    = Promise[T]()

    producer.send(
      record,
      new Callback() {
        def onCompletion(metadata: RecordMetadata, exception: Exception) {
          if (exception == null) promise.success(value)
          else promise.failure(exception)
        }
      }
    )

    promise.future
  }

  private def keyBytes(key: String) = if (key != null) key.getBytes("UTF8") else null
}

Source File: TestProducer.scala From asura with MIT License

5 votes

package asura.kafka.producer

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.Future

object TestProducer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    logger.info("Start producer")

    implicit val system = ActorSystem("producer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer)
    val done: Future[Done] =
      Source(1 to 100)
        .map(value => new ProducerRecord[String, String]("test-topic", s"msg ${value}"))
        .runWith(Producer.plainSink(producerSettings))

    done onComplete {
      case scala.util.Success(_) => logger.info("Done"); system.terminate()
      case scala.util.Failure(err) => logger.error(err.toString); system.terminate()
    }
  }
}

Source File: TestAvroProducer.scala From asura with MIT License

5 votes

package asura.kafka.producer

import akka.actor.ActorSystem
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import asura.kafka.avro.SampleAvroClass
import com.typesafe.scalalogging.StrictLogging
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializerConfig, KafkaAvroSerializer}
import org.apache.avro.specific.SpecificRecord
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization._

import scala.collection.JavaConverters._

// https://doc.akka.io/docs/alpakka-kafka/current/serialization.html
object TestAvroProducer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    implicit val system = ActorSystem("producer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val schemaRegistryUrl = ""
    val bootstrapServers = ""
    val topic = ""

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegistryUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    )
    val producerSettings: ProducerSettings[String, SpecificRecord] = {
      val kafkaAvroSerializer = new KafkaAvroSerializer()
      kafkaAvroSerializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val serializer = kafkaAvroSerializer.asInstanceOf[Serializer[SpecificRecord]]

      ProducerSettings(system, new StringSerializer, serializer)
        .withBootstrapServers(bootstrapServers)
    }

    val samples = (1 to 3).map(i => SampleAvroClass(s"key_$i", s"name_$i"))
    val done = Source(samples)
      .map(n => new ProducerRecord[String, SpecificRecord](topic, n.key, n))
      .runWith(Producer.plainSink(producerSettings))

    done onComplete {
      case scala.util.Success(_) => logger.info("Done"); system.terminate()
      case scala.util.Failure(err) => logger.error(err.toString); system.terminate()
    }
  }
}

Source File: KafkaWordCount.scala From BigDatalog with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: package.scala From kafka4s with Apache License 2.0

5 votes

package com.banno.kafka

import org.scalacheck.{Arbitrary, Cogen, Gen}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

package object test {

  implicit def arbitraryProducerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ProducerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ProducerRecord(t, k, v)
  }

  implicit def arbitraryConsumerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ConsumerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      p <- Gen.posNum[Int]
      o <- Gen.posNum[Long]
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ConsumerRecord(t, p, o, k, v)
  }

  //these things are necessary for EqSpec
  implicit def producerRecordCogen[K, V]: Cogen[ProducerRecord[K, V]] =
    Cogen(pr => pr.key.toString.length.toLong + pr.value.toString.length.toLong) // ¯\_(ツ)_/¯
  implicit def consumerRecordCogen[K, V]: Cogen[ConsumerRecord[K, V]] =
    Cogen(cr => cr.key.toString.length.toLong + cr.value.toString.length.toLong) // ¯\_(ツ)_/¯
}

Source File: PrometheusMetricsReporterApiSpec.scala From kafka4s with Apache License 2.0

5 votes

package com.banno.kafka.metrics.prometheus

import scala.collection.compat._
import cats.implicits._
import cats.effect.IO
import com.banno.kafka._
import com.banno.kafka.producer._
import com.banno.kafka.consumer._
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import io.prometheus.client.CollectorRegistry
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.jdk.CollectionConverters._
import scala.concurrent.ExecutionContext
import scala.concurrent.duration._

class PrometheusMetricsReporterApiSpec extends AnyFlatSpec with Matchers with InMemoryKafka {
  implicit val defaultContextShift = IO.contextShift(ExecutionContext.global)
  implicit val defaultConcurrent = IO.ioConcurrentEffect(defaultContextShift)
  implicit val defaultTimer = IO.timer(ExecutionContext.global)

  //when kafka clients change their metrics, this test will help identify the changes we need to make
  "Prometheus reporter" should "register Prometheus collectors for all known Kafka metrics" in {
    val topic = createTopic(2)
    val records =
      List(new ProducerRecord(topic, 0, "a", "a"), new ProducerRecord(topic, 1, "b", "b"))
    ProducerApi
      .resource[IO, String, String](
        BootstrapServers(bootstrapServer),
        MetricReporters[ProducerPrometheusReporter]
      )
      .use(
        p =>
          ConsumerApi
            .resource[IO, String, String](
              BootstrapServers(bootstrapServer),
              ClientId("c1"),
              MetricReporters[ConsumerPrometheusReporter]
            )
            .use(
              c1 =>
                ConsumerApi
                  .resource[IO, String, String](
                    BootstrapServers(bootstrapServer),
                    ClientId("c2"),
                    MetricReporters[ConsumerPrometheusReporter]
                  )
                  .use(
                    c2 =>
                      for {
                        _ <- p.sendSyncBatch(records)

                        _ <- c1.assign(topic, Map.empty[TopicPartition, Long])
                        _ <- c1.poll(1 second)
                        _ <- c1.poll(1 second)

                        _ <- c2.assign(topic, Map.empty[TopicPartition, Long])
                        _ <- c2.poll(1 second)
                        _ <- c2.poll(1 second)

                        _ <- IO.sleep(PrometheusMetricsReporterApi.defaultUpdatePeriod + (1 second))
                        _ <- p.close
                        _ <- c1.close
                        _ <- c2.close
                      } yield {
                        val registry = CollectorRegistry.defaultRegistry
                        registry.metricFamilySamples.asScala
                          .count(_.name.startsWith("kafka_producer")) should ===(56)
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_producer_record_send_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2)))

                        registry.metricFamilySamples.asScala
                          .count(_.name.startsWith("kafka_consumer")) should ===(50)
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_consumer_records_consumed_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2)))
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_consumer_topic_records_consumed_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2)))
                      }
                  )
            )
      )
      .unsafeRunSync()
  }

}

Source File: ExampleApp.scala From kafka4s with Apache License 2.0

5 votes

package example1

import cats.effect._
import cats.implicits._
import com.banno.kafka._
import com.banno.kafka.admin._
import com.banno.kafka.schemaregistry._
import com.banno.kafka.consumer._
import com.banno.kafka.producer._
import com.sksamuel.avro4s.RecordFormat
import org.apache.kafka.clients.admin.NewTopic
import org.apache.kafka.clients.producer.ProducerRecord
import scala.concurrent.duration._
import org.apache.kafka.common.TopicPartition

final class ExampleApp[F[_]: Async: ContextShift] {
  import ExampleApp._

  // Change these for your environment as needed
  val topic = new NewTopic(s"example1.customers.v1", 1, 3.toShort)
  val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093"
  val schemaRegistryUri = "http://kafka.local:8081"

  val producerRecords: Vector[ProducerRecord[CustomerId, Customer]] = (1 to 10)
    .map(
      a =>
        new ProducerRecord(
          topic.name,
          CustomerId(a.toString),
          Customer(s"name-${a}", s"address-${a}")
        )
    )
    .toVector

  val producerResource: Resource[F, ProducerApi[F, CustomerId, Customer]] =
    ProducerApi.Avro4s.resource[F, CustomerId, Customer](
      BootstrapServers(kafkaBootstrapServers),
      SchemaRegistryUrl(schemaRegistryUri),
      ClientId("producer-example")
    )

  val consumerResource =
    ConsumerApi.Avro4s.resource[F, CustomerId, Customer](
      BootstrapServers(kafkaBootstrapServers),
      SchemaRegistryUrl(schemaRegistryUri),
      ClientId("consumer-example"),
      GroupId("consumer-example-group"),
      EnableAutoCommit(false)
    )

  val example: F[Unit] =
    for {
      _ <- Sync[F].delay(println("Starting kafka4s example"))

      _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic)
      _ <- Sync[F].delay(println(s"Created topic ${topic.name}"))

      schemaRegistry <- SchemaRegistryApi(schemaRegistryUri)
      _ <- schemaRegistry.registerKey[CustomerId](topic.name)
      _ <- Sync[F].delay(println(s"Registered key schema for topic ${topic.name}"))

      _ <- schemaRegistry.registerValue[Customer](topic.name)
      _ <- Sync[F].delay(println(s"Registered value schema for topic ${topic.name}"))

      _ <- producerResource.use(
        producer =>
          producerRecords.traverse_(
            pr =>
              producer.sendSync(pr) *> Sync[F]
                .delay(println(s"Wrote producer record: key ${pr.key} and value ${pr.value}"))
          )
      )

      _ <- consumerResource.use(
        consumer =>
          consumer.assign(topic.name, Map.empty[TopicPartition, Long]) *>
            consumer
              .recordStream(1.second)
              .take(producerRecords.size.toLong)
              .evalMap(
                cr =>
                  Sync[F]
                    .delay(println(s"Read consumer record: key ${cr.key} and value ${cr.value}"))
              )
              .compile
              .drain
      )

      _ <- Sync[F].delay(println("Finished kafka4s example"))
    } yield ()
}

object ExampleApp {
  case class CustomerId(id: String)
  case class Customer(name: String, address: String)
  implicit def customerIdRecordFormat = RecordFormat[CustomerId]
  implicit def customerRecordFormat = RecordFormat[Customer]

  def apply[F[_]: Async: ContextShift] = new ExampleApp[F]
}

Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.execution.Scheduler.Implicits.global
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.FunSuite

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration._

class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit {
  val topicsRegex = "monix-kafka-tests-.*".r
  val topicMatchingRegex = "monix-kafka-tests-anything"

  val producerCfg = KafkaProducerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    clientId = "monix-kafka-1-0-producer-test"
  )

  val consumerCfg = KafkaConsumerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    groupId = "kafka-tests",
    clientId = "monix-kafka-1-0-consumer-test",
    autoOffsetReset = AutoOffsetReset.Earliest
  )

  test("publish one message when subscribed to topics regex") {
    withRunningKafka {

      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io)
      val consumer = Await.result(consumerTask.runToFuture, 60.seconds)

      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "my-message")
        Await.result(send.runToFuture, 30.seconds)

        val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList
        assert(records === List("my-message"))
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
        consumer.close()
      }
    }
  }

  test("listen for one message when subscribed to topics regex") {

    withRunningKafka {
      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io)
      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "test-message")
        Await.result(send.runToFuture, 30.seconds)

        val first = consumer.take(1).map(_.value()).firstL
        val result = Await.result(first.runToFuture, 30.seconds)
        assert(result === "test-message")
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
      }
    }
  }

  test("full producer/consumer test when subscribed to topics regex") {
    withRunningKafka {
      val count = 10000

      val producer = KafkaProducerSink[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = consumer
        .map(_.value())
        .toListL

      val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
      assert(result.map(_.toInt).sum === (0 until count).sum)
    }
  }
}

Source File: KafkaOffsetRevertTest.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.kafka

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp(true))
    var i = 0;
    while (true) {

      //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: StreamingProducerApp.scala From Scala-Programming-Projects with MIT License

5 votes

package coinyser

import cats.effect.{ExitCode, IO, IOApp}
import com.pusher.client.Pusher
import StreamingProducer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import scala.collection.JavaConversions._

object StreamingProducerApp extends IOApp {
  val topic = "transactions"

  val pusher = new Pusher("de504dc5763aeef9ff52")

  val props = Map(
    "bootstrap.servers" -> "localhost:9092",
    "key.serializer" -> "org.apache.kafka.common.serialization.IntegerSerializer",
    "value.serializer" -> "org.apache.kafka.common.serialization.StringSerializer")

  def run(args: List[String]): IO[ExitCode] = {
    val kafkaProducer = new KafkaProducer[Int, String](props)

    subscribe(pusher) { wsTx =>
      val tx = convertWsTransaction(deserializeWebsocketTransaction(wsTx))
      val jsonTx = serializeTransaction(tx)
      kafkaProducer.send(new ProducerRecord(topic, tx.tid, jsonTx))
    }.flatMap(_ => IO.never)
  }
}

Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
}

Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.execution.Scheduler.Implicits.global
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.FunSuite

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration._

class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit {
  val topicsRegex = "monix-kafka-tests-.*".r
  val topicMatchingRegex = "monix-kafka-tests-anything"

  val producerCfg = KafkaProducerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    clientId = "monix-kafka-1-0-producer-test"
  )

  val consumerCfg = KafkaConsumerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    groupId = "kafka-tests",
    clientId = "monix-kafka-1-0-consumer-test",
    autoOffsetReset = AutoOffsetReset.Earliest
  )

  test("publish one message when subscribed to topics regex") {

    withRunningKafka {
      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io)
      val consumer = Await.result(consumerTask.runToFuture, 60.seconds)

      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "my-message")
        Await.result(send.runToFuture, 30.seconds)

        val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList
        assert(records === List("my-message"))
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
        consumer.close()
      }
    }
  }

  test("listen for one message when subscribed to topics regex") {
    withRunningKafka {
      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io)
      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "test-message")
        Await.result(send.runToFuture, 30.seconds)

        val first = consumer.take(1).map(_.value()).firstL
        val result = Await.result(first.runToFuture, 30.seconds)
        assert(result === "test-message")
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
      }
    }
  }

  test("full producer/consumer test when subscribed to topics regex") {
    withRunningKafka {
      val count = 10000

      val producer = KafkaProducerSink[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = consumer
        .map(_.value())
        .toListL

      val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
      assert(result.map(_.toInt).sum === (0 until count).sum)
    }
  }
}

Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
}

Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.execution.Scheduler.Implicits.global
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.FunSuite

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration._

class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit {
  val topicsRegex = "monix-kafka-tests-.*".r
  val topicMatchingRegex = "monix-kafka-tests-anything"

  val producerCfg = KafkaProducerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    clientId = "monix-kafka-1-0-producer-test"
  )

  val consumerCfg = KafkaConsumerConfig.default.copy(
    bootstrapServers = List("127.0.0.1:6001"),
    groupId = "kafka-tests",
    clientId = "monix-kafka-1-0-consumer-test",
    autoOffsetReset = AutoOffsetReset.Earliest
  )

  test("publish one message when subscribed to topics regex") {

    withRunningKafka {
      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io)
      val consumer = Await.result(consumerTask.runToFuture, 60.seconds)

      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "my-message")
        Await.result(send.runToFuture, 30.seconds)

        val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList
        assert(records === List("my-message"))
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
        consumer.close()
      }
    }
  }

  test("listen for one message when subscribed to topics regex") {

    withRunningKafka {
      val producer = KafkaProducer[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io)
      try {
        // Publishing one message
        val send = producer.send(topicMatchingRegex, "test-message")
        Await.result(send.runToFuture, 30.seconds)

        val first = consumer.take(1).map(_.value()).firstL
        val result = Await.result(first.runToFuture, 30.seconds)
        assert(result === "test-message")
      } finally {
        Await.result(producer.close().runToFuture, Duration.Inf)
      }
    }
  }

  test("full producer/consumer test when subscribed to topics regex") {
    withRunningKafka {
      val count = 10000

      val producer = KafkaProducerSink[String, String](producerCfg, io)
      val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = consumer
        .map(_.value())
        .toListL

      val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
      assert(result.map(_.toInt).sum === (0 until count).sum)
    }
  }
}

Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0

5 votes

package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
}

Source File: SimpleProducer.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License

5 votes

package packt.ch05

import java.util.{Date, Properties}

import packt.ch05.SimpleProducer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

object SimpleProducer {

  private var producer: KafkaProducer[String, String] = _

  def main(args: Array[String]) {
    val argsCount = args.length
    if (argsCount == 0 || argsCount == 1)
      throw new IllegalArgumentException(
        "Provide topic name and Message count as arguments")

    // Topic name and the message count to be published is passed from the
    // command line
    val topic = args(0)
    val count = args(1)

    val messageCount = java.lang.Integer.parseInt(count)
    println("Topic Name - " + topic)
    println("Message Count - " + messageCount)
    val simpleProducer = new SimpleProducer()
    simpleProducer.publishMessage(topic, messageCount)
  }
}

class SimpleProducer {

  val props = new Properties()

  // Set the broker list for requesting metadata to find the lead broker
  props.put("metadata.broker.list",
    "192.168.146.132:9092, 192.168.146.132:9093, 192.168 146.132:9094 ")

  //This specifies the serializer class for keys
  props.put("serializer.class", "kafka.serializer.StringEncoder")

  // 1 means the producer receives an acknowledgment once the lead replica
  // has received the data. This option provides better durability as the
  // client waits until the server acknowledges the request as successful.
  props.put("request.required.acks", "1")

  producer = new KafkaProducer(props)

  private def publishMessage(topic: String, messageCount: Int) {
    for (mCount <- 0 until messageCount) {
      val runtime = new Date().toString
      val msg = "Message Publishing Time - " + runtime
      println(msg)

      // Create a message
      val data = new ProducerRecord[String, String](topic, msg)

      // Publish the message
      producer.send(data)
    }

    // Close producer connection with broker.
    producer.close()
  }
}

Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0

5 votes

package com.cloudera.sa.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


}

Source File: KafkaProducerActorSpec.scala From scala-kafka-client with MIT License

5 votes

package cakesolutions.kafka.akka

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.util.Random

class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) {

  def this() = this(ActorSystem("KafkaProducerActorSpec"))

  private def randomString: String = Random.alphanumeric.take(5).mkString("")

  val deserializer = new StringDeserializer
  val consumerConf = KafkaConsumer.Conf(
    deserializer, deserializer,
    bootstrapServers = s"localhost:$kafkaPort",
    groupId = "test",
    enableAutoCommit = false,
    autoOffsetReset = OffsetResetStrategy.EARLIEST
  )

  val serializer = new StringSerializer
  val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort")

  "KafkaProducerActor" should "write a given batch to Kafka" in {
    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar"))
    val message = ProducerRecords(batch, Some('response))

    probe.send(producer, message)

    probe.expectMsg('response)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in {
    import scala.concurrent.duration._

    val topic = randomString
    val probe = TestProbe()
    val producer = system.actorOf(KafkaProducerActor.props(producerConf))
    val batch: Seq[ProducerRecord[String, String]] = Seq(
      KafkaProducerRecord(topic, "foo"),
      KafkaProducerRecord(topic, "key", "value"),
      KafkaProducerRecord(topic, "bar")
    )
    val message = ProducerRecords(batch)

    probe.send(producer, message)

    probe.expectNoMessage(3.seconds)

    val results = consumeFromTopic(topic, 3, 10000)

    results(0) shouldEqual ((None, "foo"))
    results(1) shouldEqual ((Some("key"), "value"))
    results(2) shouldEqual ((None, "bar"))
  }

  private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) =
    kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer)
}

Source File: Kafka.scala From event-sourcing-kafka-streams with MIT License

5 votes

package org.amitayh.invoices.web

import java.time.Duration
import java.util.Collections.singletonList
import java.util.Properties

import cats.effect._
import cats.syntax.apply._
import cats.syntax.functor._
import fs2._
import org.amitayh.invoices.common.Config
import org.amitayh.invoices.common.Config.Topics.Topic
import org.apache.kafka.clients.consumer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.log4s.{Logger, getLogger}

import scala.collection.JavaConverters._

object Kafka {

  trait Producer[F[_], K, V] {
    def send(key: K, value: V): F[RecordMetadata]
  }

  object Producer {
    def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] =
      (key: K, value: V) => Async[F].async { cb =>
        val record = new ProducerRecord(topic.name, key, value)
        producer.send(record, (metadata: RecordMetadata, exception: Exception) => {
          if (exception != null) cb(Left(exception))
          else cb(Right(metadata))
        })
      }
  }

  def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer)
    }
    create.map(producer => (Producer(producer, topic), close(producer)))
  }

  def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId)
      val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer)
      consumer.subscribe(singletonList(topic.name))
      consumer
    }
    Stream.bracket(create)(close[F]).flatMap(consume[F, K, V])
  }

  private val logger: Logger = getLogger

  def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg))

  private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for {
    records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1))))
    record <- Stream.emits(records.iterator.asScala.toSeq)
  } yield record.key -> record.value

  private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] =
    Sync[F].delay(producer.close()) *> log(s"Producer closed")

  private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] =
    Sync[F].delay(consumer.close()) *> log("Consumer closed")

}

Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0

5 votes

package com.lightbend.modelserving.client

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer


class MessageSender(val brokers: String) {

  import MessageSender._
  val producer = new KafkaProducer[Array[Byte], Array[Byte]](
    providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName))

  def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get
    producer.flush()
  }

  def writeValue(topic: String, value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get
    producer.flush()
  }

  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: SlotPartitionMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.demo

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {

    val prop = Common.getProp
    prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: IntervalJoinKafkaKeyMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.intervalJoin

import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object IntervalJoinKafkaKeyMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("topic_left")
      right("topic_right")
      Thread.sleep(500)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

  var idRight = 0

  def right(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idRight = idRight + 1
    val map = Map("id" -> idRight,  "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

}

Source File: FileSinkMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.text.SimpleDateFormat
import java.util.Calendar

import com.venn.common.Common
import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object FileSinkMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("roll_file_sink")
      Thread.sleep(100)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime)
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
//    producer.send(msg)
//    producer.flush()
  }

  var minute : Int = 1
  val calendar: Calendar = Calendar.getInstance()
  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }

}

Source File: WindowDemoMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.trigger

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: LateDataMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.sideoutput.lateDataProcess

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.SECOND, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =74540;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(200)
      i = i + 1
//      System.exit(-1)
    }
  }

}

Source File: CurrentDayMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.dayWindow

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =0;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
//      System.exit(-1)
    }
  }

}

Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0

5 votes

package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

}

Source File: KafkaReporter.scala From Swallow with Apache License 2.0

5 votes

package com.intel.hibench.common.streaming.metrics

import java.util.Properties

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}
import org.apache.kafka.common.serialization.StringSerializer


class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter {

  private val producer = ProducerSingleton.getInstance(bootstrapServers)

  override def report(startTime: Long, endTime: Long): Unit = {
    producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime"))
  }
}

object ProducerSingleton {
  @volatile private var instance : Option[KafkaProducer[String, String]] = None

  def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized {
    if (!instance.isDefined) {
      synchronized {
        if(!instance.isDefined) {
          val props = new Properties()
          props.put("bootstrap.servers", bootstrapServers)
          instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer))
        }
      }
    }
    instance.get
  }
}

Source File: BasicTest.scala From ksql-streams with Apache License 2.0

5 votes

package com.landoop.kstreams.sql.cluster

import org.apache.avro.Schema
import org.apache.avro.generic.GenericData
import org.apache.kafka.clients.producer.ProducerRecord

class BasicTest extends ClusterTestingCapabilities {

  private def createAvroRecord = {
    val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}"
    val parser = new Schema.Parser
    val schema = parser.parse(userSchema)
    val avroRecord = new GenericData.Record(schema)
    avroRecord.put("name", "testUser")
    avroRecord
  }

  "KCluster" should {
    "start up and be able to handle avro records being sent " in {
      val topic = "testAvro"
      val avroRecord = createAvroRecord
      val objects = Array[AnyRef](avroRecord)
      val producerProps = stringAvroProducerProps
      val producer = createProducer(producerProps)

      for (o <- objects) {
        val message = new ProducerRecord[String, Any](topic, o)
        producer.send(message)
      }
      val consumerProps = stringAvroConsumerProps()
      val consumer = createStringAvroConsumer(consumerProps)
      val records = consumeStringAvro(consumer, topic, objects.length)
      objects.toSeq shouldBe records
    }

    "handle the avro new producer" in {
      val topic = "testAvro"
      val avroRecord = createAvroRecord
      val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes)
      val producerProps = stringAvroProducerProps
      val producer = createProducer(producerProps)
      for (o <- objects) {
        producer.send(new ProducerRecord[String, Any](topic, o))
      }
      val consumerProps = stringAvroConsumerProps()
      val consumer = createStringAvroConsumer(consumerProps)
      val records = consumeStringAvro(consumer, topic, objects.length)
      objects.deep shouldBe records.toArray.deep
    }
  }

}

Source File: KafkaMetrics.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.transport

import akka.actor.ActorSystem
import com.typesafe.config.Config
import hydra.common.config.ConfigSupport
import hydra.kafka.producer.KafkaRecordMetadata
import hydra.kafka.util.KafkaUtils
import org.apache.kafka.clients.producer.ProducerRecord
import spray.json.DefaultJsonProtocol

trait KafkaMetrics {
  def saveMetrics(record: KafkaRecordMetadata): Unit

  def close(): Unit = {}
}

// $COVERAGE-OFF$
object NoOpMetrics extends KafkaMetrics {
  def saveMetrics(record: KafkaRecordMetadata): Unit = {}
}

// $COVERAGE-ON$

class PublishMetrics(topic: String)(implicit system: ActorSystem)
    extends KafkaMetrics
    with DefaultJsonProtocol
    with ConfigSupport {

  import spray.json._

  import KafkaRecordMetadata._

  private val producer = KafkaUtils
    .producerSettings[String, String]("string", rootConfig)
    .withProperty("client.id", "hydra.kafka.metrics")
    .createKafkaProducer()

  def saveMetrics(record: KafkaRecordMetadata) = {
    val payload = record.toJson.compactPrint
    producer.send(new ProducerRecord(topic, record.destination, payload))
  }

  override def close(): Unit = {
    producer.close()
  }

}

object KafkaMetrics {

  import ConfigSupport._

  def apply(config: Config)(implicit system: ActorSystem): KafkaMetrics = {
    val metricsEnabled =
      config.getBooleanOpt("transports.kafka.metrics.enabled").getOrElse(false)

    val metricsTopic = config
      .getStringOpt("transports.kafka.metrics.topic")
      .getOrElse("HydraKafkaError")

    if (metricsEnabled) new PublishMetrics(metricsTopic) else NoOpMetrics
  }
}

Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
}

Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
}

Source File: ProducerSpec.scala From zio-kafka with Apache License 2.0

5 votes

package zio.kafka.producer

import org.apache.kafka.clients.producer.ProducerRecord
import zio._
import zio.clock.Clock
import zio.kafka.KafkaTestUtils._
import zio.kafka.consumer.{ Consumer, ConsumerSettings, Subscription }
import zio.kafka.embedded.Kafka
import zio.kafka.serde.Serde
import zio.test.Assertion._
import zio.test._
import zio.test.environment.TestEnvironment

object ProducerSpec extends DefaultRunnableSpec {
  override def spec =
    suite("producer test suite")(
      testM("one record") {
        for {
          _ <- Producer.produce[Any, String, String](new ProducerRecord("topic", "boo", "baa"))
        } yield assertCompletes
      },
      testM("a non-empty chunk of records") {
        import Subscription._

        val (topic1, key1, value1) = ("topic1", "boo", "baa")
        val (topic2, key2, value2) = ("topic2", "baa", "boo")
        val chunks = Chunk.fromIterable(
          List(new ProducerRecord(topic1, key1, value1), new ProducerRecord(topic2, key2, value2))
        )
        def withConsumer(subscription: Subscription, settings: ConsumerSettings) =
          Consumer.make(settings).flatMap { c =>
            (c.subscribe(subscription).toManaged_ *> c.plainStream(Serde.string, Serde.string).toQueue())
          }

        for {
          outcome  <- Producer.produceChunk[Any, String, String](chunks)
          settings <- consumerSettings("testGroup", "testClient")
          record1 <- withConsumer(Topics(Set(topic1)), settings).use { consumer =>
                      for {
                        messages <- consumer.take.flatMap(_.done).mapError(_.getOrElse(new NoSuchElementException))
                        record = messages
                          .filter(rec => rec.record.key == key1 && rec.record.value == value1)
                          .toSeq
                      } yield record
                    }
          record2 <- withConsumer(Topics(Set(topic2)), settings).use { consumer =>
                      for {
                        messages <- consumer.take.flatMap(_.done).mapError(_.getOrElse(new NoSuchElementException))
                        record   = messages.filter(rec => rec.record.key == key2 && rec.record.value == value2)
                      } yield record
                    }
        } yield {
          assert(outcome.length)(equalTo(2)) &&
          assert(record1)(isNonEmpty) &&
          assert(record2.length)(isGreaterThan(0))
        }
      },
      testM("an empty chunk of records") {
        val chunks = Chunk.fromIterable(List.empty)
        for {
          outcome <- Producer.produceChunk[Any, String, String](chunks)
        } yield assert(outcome.length)(equalTo(0))
      }
    ).provideSomeLayerShared[TestEnvironment](
      ((Kafka.embedded >>> stringProducer) ++ Kafka.embedded).mapError(TestFailure.fail) ++ Clock.live
    )
}

Source File: KafkaEventProducer.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.cosmosdb.cache

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.scaladsl.Producer
import akka.kafka.{ProducerMessage, ProducerSettings}
import akka.stream.scaladsl.{Keep, Sink, Source}
import akka.stream.{ActorMaterializer, OverflowStrategy, QueueOfferResult}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.openwhisk.connector.kafka.KamonMetricsReporter

import scala.collection.immutable.Seq
import scala.concurrent.{ExecutionContext, Future, Promise}

case class KafkaEventProducer(
  settings: ProducerSettings[String, String],
  topic: String,
  eventProducerConfig: EventProducerConfig)(implicit system: ActorSystem, materializer: ActorMaterializer)
    extends EventProducer {
  private implicit val executionContext: ExecutionContext = system.dispatcher

  private val queue = Source
    .queue[(Seq[String], Promise[Done])](eventProducerConfig.bufferSize, OverflowStrategy.dropNew) //TODO Use backpressure
    .map {
      case (msgs, p) =>
        ProducerMessage.multi(msgs.map(newRecord), p)
    }
    .via(Producer.flexiFlow(producerSettings))
    .map {
      case ProducerMessage.MultiResult(_, passThrough) =>
        passThrough.success(Done)
      case _ => //As we use multi mode only other modes need not be handled
    }
    .toMat(Sink.ignore)(Keep.left)
    .run

  override def send(msg: Seq[String]): Future[Done] = {
    val promise = Promise[Done]
    queue.offer(msg -> promise).flatMap {
      case QueueOfferResult.Enqueued    => promise.future
      case QueueOfferResult.Dropped     => Future.failed(new Exception("Kafka request queue is full."))
      case QueueOfferResult.QueueClosed => Future.failed(new Exception("Kafka request queue was closed."))
      case QueueOfferResult.Failure(f)  => Future.failed(f)
    }
  }

  def close(): Future[Done] = {
    queue.complete()
    queue.watchCompletion()
  }

  private def newRecord(msg: String) = new ProducerRecord[String, String](topic, "messages", msg)

  private def producerSettings =
    settings.withProperty(ConsumerConfig.METRIC_REPORTER_CLASSES_CONFIG, KamonMetricsReporter.name)
}

Source File: KafkaAsReceiver.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.sql.execution.streaming.http

import java.util.Properties

import org.apache.kafka.clients.producer.Callback
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.producer.RecordMetadata
import org.apache.spark.internal.Logging


class KafkaAsReceiver(bootstrapServers: String) extends AbstractActionsHandler with SendStreamActionSupport with Logging {
	val props = new Properties();
	props.put("bootstrap.servers", bootstrapServers);
	props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
	props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
	val producer = new KafkaProducer[String, String](props);

	override def listActionHandlerEntries(requestBody: Map[String, Any]): PartialFunction[String, Map[String, Any]] = {
		case "actionSendStream" ⇒ handleSendStream(requestBody);
	}

	override def destroy() {
		producer.close();
	}

	override def onReceiveStream(topic: String, rows: Array[RowEx]) = {
		var index = -1;
		for (row ← rows) {
			index += 1;
			val key = "" + row.batchId + "-" + row.offsetInBatch;
			//TODO: send an array instead of a string value?
			val value = row.originalRow(0).toString();
			val record = new ProducerRecord[String, String](topic, key, value);
			producer.send(record, new Callback() {
				def onCompletion(metadata: RecordMetadata, e: Exception) = {
					if (e != null) {
						e.printStackTrace();
						logError(e.getMessage);
					}
					else {
						val offset = metadata.offset();
						val partition = metadata.partition();
						logDebug(s"record is sent to kafka:key=$key, value=$value, partition=$partition, offset=$offset");
					}
				}
			});
		}
	}
}

class KafkaAsReceiverFactory extends ActionsHandlerFactory {
	def createInstance(params: Params) = new KafkaAsReceiver(params.getRequiredString("bootstrapServers"));
}

Source File: ActionsHandler.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.sql.execution.streaming.http

import java.util.Properties
import scala.collection.mutable.ArrayBuffer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.spark.internal.Logging
import org.apache.spark.sql.Row
import java.sql.Timestamp
import org.apache.spark.sql.types.StructType
import java.util.concurrent.atomic.AtomicInteger


	def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries;
	def destroy();
}

trait ActionsHandlerFactory {
	def createInstance(params: Params): ActionsHandler;
}

abstract class AbstractActionsHandler extends ActionsHandler {
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any = {
		val opt = requestBody.get(key);
		if (opt.isEmpty) {
			throw new MissingRequiredRequestParameterException(key);
		}

		opt.get;
	}

	override def destroy() = {
	}
}

class NullActionsHandler extends AbstractActionsHandler {
	override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() {
		def apply(action: String) = Map[String, Any]();
		//yes, do nothing
		def isDefinedAt(action: String) = false;
	};
}

//rich row with extra info: id, time stamp, ...
case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) {
	def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp);
	def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch");
	def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) };
}

trait SendStreamActionSupport {
	def onReceiveStream(topic: String, rows: Array[RowEx]);
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any;

	val listeners = ArrayBuffer[StreamListener]();

	def addListener(listener: StreamListener): this.type = {
		listeners += listener;
		this;
	}

	protected def notifyListeners(topic: String, data: Array[RowEx]) {
		listeners.foreach { _.onArrive(topic, data); }
	}

	def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = {
		val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String];
		val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long];
		val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]];
		val ts = new Timestamp(System.currentTimeMillis());
		var index = -1;
		val rows2 = rows.map { row ⇒
			index += 1;
			RowEx(Row.fromSeq(row.toSeq), batchId, index, ts)
		}

		onReceiveStream(topic, rows2);
		notifyListeners(topic, rows2);
		Map("rowsCount" -> rows.size);
	}
}

Source File: ProducerStreamSpec.scala From reactive-kafka-microservice-template with Apache License 2.0

5 votes

package akka.kafka

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source}
import akka.testkit.{DefaultTimeout, ImplicitSender, TestKit, TestProbe}
import com.omearac.consumers.ConsumerStream
import com.omearac.producers.ProducerStream
import com.omearac.settings.Settings
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.KafkaMessages.{ExampleAppEvent, KafkaMessage}
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}


class ProducerStreamSpec extends TestKit(ActorSystem("ProducerStreamSpec"))
    with DefaultTimeout with ImplicitSender
    with WordSpecLike with Matchers with BeforeAndAfterAll
    with ConsumerStream with ProducerStream {

    val settings = Settings(system).KafkaProducers
    val probe = TestProbe()

    override def afterAll: Unit = {
        shutdown()
    }

    "Sending KafkaMessages to the KafkaMessage producerStream" should {
        "be converted to JSON and obtained by the Stream Sink " in {

            //Creating Producer Stream Components for publishing KafkaMessages
            val producerProps = settings.KafkaProducerInfo("KafkaMessage")
            val numOfMessages = 50
            val kafkaMsgs = for { i <- 0 to numOfMessages} yield KafkaMessage("sometime", "somestuff", i)
            val producerSource= Source(kafkaMsgs)
            val producerFlow = createStreamFlow[KafkaMessage](producerProps)
            val producerSink = Sink.actorRef(probe.ref, "complete")

            val jsonKafkaMsgs = for { msg <- kafkaMsgs} yield Conversion[KafkaMessage].convertToJson(msg)

            producerSource.via(producerFlow).runWith(producerSink)
            for (i <- 0 to jsonKafkaMsgs.length) {
                probe.expectMsgPF(){
                    case m: ProducerRecord[_,_] => if (jsonKafkaMsgs.contains(m.value())) () else fail()
                    case "complete" => ()
                }
            }
        }
    }

    "Sending ExampleAppEvent messages to the EventMessage producerStream" should {
        "be converted to JSON and obtained by the Stream Sink " in {

            //Creating Producer Stream Components for publishing ExampleAppEvent messages
            val producerProps = settings.KafkaProducerInfo("ExampleAppEvent")
            val numOfMessages = 50
            val eventMsgs = for { i <- 0 to 50} yield ExampleAppEvent("sometime", "senderID", s"Event number $i occured")

            val producerSource= Source(eventMsgs)
            val producerFlow = createStreamFlow[ExampleAppEvent](producerProps)
            val producerSink = Sink.actorRef(probe.ref, "complete")

            val jsonAppEventMsgs = for{ msg <- eventMsgs} yield Conversion[ExampleAppEvent].convertToJson(msg)
            producerSource.via(producerFlow).runWith(producerSink)
            for (i <- 0 to jsonAppEventMsgs.length){
                probe.expectMsgPF(){
                    case m: ProducerRecord[_,_] => if (jsonAppEventMsgs.contains(m.value())) () else fail()
                    case "complete" => ()
                }
            }
        }
    }
}

Source File: ProducerStream.scala From reactive-kafka-microservice-template with Apache License 2.0

5 votes

package com.omearac.producers

import akka.actor.{ActorRef, ActorSystem}
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.OverflowStrategy
import akka.stream.scaladsl.{Flow, Source}
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.{AkkaStreams, EventSourcing}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}



trait ProducerStream extends AkkaStreams with EventSourcing {
    implicit val system: ActorSystem
    def self: ActorRef

    def createStreamSource[msgType] = {
        Source.queue[msgType](Int.MaxValue,OverflowStrategy.backpressure)
    }

    def createStreamSink(producerProperties: Map[String, String]) = {
        val kafkaMBAddress = producerProperties("bootstrap-servers")
        val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers(kafkaMBAddress)

        Producer.plainSink(producerSettings)
    }

    def createStreamFlow[msgType: Conversion](producerProperties: Map[String, String]) = {
        val numberOfPartitions = producerProperties("num.partitions").toInt -1
        val topicToPublish = producerProperties("publish-topic")
        val rand = new scala.util.Random
        val range = 0 to numberOfPartitions

        Flow[msgType].map { msg =>
            val partition = range(rand.nextInt(range.length))
            val stringJSONMessage = Conversion[msgType].convertToJson(msg)
            new ProducerRecord[Array[Byte], String](topicToPublish, partition, null, stringJSONMessage)
        }
    }
}

Source File: KafkaTransmitter.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.transmitters

import java.util.Properties

import akka.actor.{ActorLogging, Props}
import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import scala.sys.SystemProperties
import com.typesafe.config.Config


object KafkaTransmitter {
  def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic))
}

class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging {

  private val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers"))
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer"))
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer"))

  // Enable settings for a secure environment, if necessary.
  // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html
  val systemProperties = new SystemProperties
  if (config.getBoolean("transmitter.kafka.security-enabled")) {
    props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol"))
    systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file"))
  }

  private val producer = new KafkaProducer[String, String](props)

  def receive = {
    case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV))
  }

  override def postStop(): Unit = {
    producer.close()
    log.info("KafkaTransmitter closed its producer.")
  }
}

Source File: EventProducer.scala From rokku with Apache License 2.0

5 votes

package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
}

Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
}

Source File: KafkaRecord.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.producer

import hydra.core.transport.HydraRecord
import org.apache.commons.lang3.ClassUtils
import org.apache.kafka.clients.producer.ProducerRecord


  val formatName: String = {
    val cname = ClassUtils.getSimpleName(getClass)
    val idx = cname.indexOf("Record")
    if (idx != -1) {
      cname.take(idx).toLowerCase
    } else {
      getClass.getName
    }
  }
}

object KafkaRecord {

  implicit def toProducerRecord[K, V](record: KafkaRecord[K, V]): ProducerRecord[K, V] = {
    new ProducerRecord[K, V](
      record.destination,
      record.partition.getOrElse(null).asInstanceOf[Integer],
      record.timestamp,
      record.key,
      record.payload
    )
  }
}

Source File: KafkaMessagingSystem.scala From amadou with Apache License 2.0

5 votes

package com.mediative.amadou
package monitoring

import java.util.Properties
import com.typesafe.config.Config
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}

class KafkaMessagingSystem(config: Config) extends MessagingSystem with Logging {
  private val properties  = KafkaMessagingSystem.readProperties(config)
  private val producer    = new KafkaProducer[String, String](properties)
  private val topicPrefix = properties.getProperty("topic.prefix")

  override def publish(topic: String, message: String): Unit = {
    val topicName = s"$topicPrefix-$topic"

    logger.info(s"Publishing to $topicName :\n$message\n")

    producer.send(new ProducerRecord[String, String](topicName, message), new Callback {
      override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit =
        if (exception != null) {
          logger
            .error(s"Cannot publish to $topicName. Caused by: ${exception.getMessage}", exception)
        }
    })
    ()
  }

  override def stop(): Unit =
    producer.close()
}

object KafkaMessagingSystem {
  def readProperties(config: Config): Properties = {
    val propertiesKeys = Seq(
      "bootstrap.servers",
      "acks",
      "retries",
      "batch.size",
      "linger.ms",
      "buffer.memory",
      "key.serializer",
      "value.serializer",
      "topic.prefix")

    val properties = new Properties()
    propertiesKeys.foreach(key => properties.setProperty(key, config.getString(key)))

    properties
  }
}

Source File: DStreamKafkaWriter.scala From spark-kafka-writer with Apache License 2.0

5 votes

package com.github.benfradet.spark.kafka.writer

import org.apache.kafka.clients.producer.{Callback, ProducerRecord}
import org.apache.spark.streaming.dstream.DStream

import scala.reflect.ClassTag


  override def writeToKafka[K, V](
    producerConfig: Map[String, Object],
    transformFunc: T => ProducerRecord[K, V],
    callback: Option[Callback] = None
  ): Unit =
    dStream.foreachRDD { rdd =>
      val rddWriter = new RDDKafkaWriter[T](rdd)
      rddWriter.writeToKafka(producerConfig, transformFunc, callback)
    }
}

Source File: RDDKafkaWriter.scala From spark-kafka-writer with Apache License 2.0

5 votes

package com.github.benfradet.spark.kafka.writer

import org.apache.kafka.clients.producer.{Callback, ProducerRecord}
import org.apache.spark.rdd.RDD

import scala.reflect.ClassTag


  override def writeToKafka[K, V](
    producerConfig: Map[String, Object],
    transformFunc: T => ProducerRecord[K, V],
    callback: Option[Callback] = None
  ): Unit =
    rdd.foreachPartition { partition =>
      val producer = KafkaProducerCache.getProducer[K, V](producerConfig)
      partition
        .map(transformFunc)
        .foreach(record => producer.send(record, callback.orNull))
    }
}

Source File: KafkaSink.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.kafka

import com.sksamuel.exts.Logging
import io.eels.schema.StructType
import io.eels.{Row, SinkWriter, Sink}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}


trait KafkaRowConverter[V] {
  def convert(row: Row): V
}

object KafkaRowConverter {
  implicit object NoopRowConverter extends KafkaRowConverter[Row] {
    override def convert(row: Row): Row = row
  }
}

case class KafkaSink[K, V](topic: String,
                           producer: KafkaProducer[K, V])
                          (implicit partitioner: KafkaPartitioner[V],
                           converter: KafkaRowConverter[V],
                           keygen: KafkaKeyGen[K]) extends Sink with Logging {

  def open(schema: StructType): SinkWriter = {

    new SinkWriter {
      override def write(row: Row): Unit = {
        val key = keygen.gen(row)
        val value = converter.convert(row)
        val record = partitioner.partition(row) match {
          case Some(part) => new ProducerRecord[K, V](topic, part, key, value)
          case _ => new ProducerRecord[K, V](topic, key, value)
        }
        logger.debug(s"Sending record $record")
        producer.send(record)
        producer.flush()
      }
      override def close(): Unit = producer.close()
    }
  }
}

Source File: WordCountProducer.scala From akka_streams_tutorial with MIT License

5 votes

package alpakka.kafka

import java.util
import java.util.concurrent.ThreadLocalRandom

import akka.actor.ActorSystem
import akka.kafka.ProducerMessage.Message
import akka.kafka.ProducerSettings
import akka.kafka.scaladsl.Producer
import akka.stream.ThrottleMode
import akka.stream.scaladsl.{Keep, Sink, Source}
import akka.{Done, NotUsed}
import org.apache.kafka.clients.producer.{Partitioner, ProducerRecord}
import org.apache.kafka.common.errors.{NetworkException, UnknownTopicOrPartitionException}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.common.{Cluster, PartitionInfo}

import scala.concurrent.Future
import scala.concurrent.duration._


class CustomPartitioner extends Partitioner {
  override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = {
    val partitionInfoList: util.List[PartitionInfo] = cluster.availablePartitionsForTopic(topic)
    val partitionCount = partitionInfoList.size
    val fakeNewsPartition = 0

    //println("CustomPartitioner received key: " + key + " and value: " + value)

    if (value.toString.contains(WordCountProducer.fakeNewsKeyword)) {
      //println("CustomPartitioner send message: " + value + " to fakeNewsPartition")
      fakeNewsPartition
    }
    else ThreadLocalRandom.current.nextInt(1, partitionCount) //round robin
  }

  override def close(): Unit = {
    println("CustomPartitioner: " + Thread.currentThread + " received close")
  }

  override def configure(configs: util.Map[String, _]): Unit = {
    println("CustomPartitioner received configure with configuration: " + configs)
  }
}

object CustomPartitioner {
  private def deserialize[V](objectData: Array[Byte]): V = org.apache.commons.lang3.SerializationUtils.deserialize(objectData).asInstanceOf[V]
}

Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0

5 votes

package com.hadooparchitecturebook.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


}

Source File: KafkaWordCount.scala From AI with Apache License 2.0

5 votes

package com.bigchange.basic

import java.util

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").
      set("spark.streaming.receiver.writeAheadLog.enable", "true").
      set("spark.streaming.kafka.maxRatePerPartition", "1000")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // 设置 checkpoint，这是考虑到了有 window 操作，window 操作一般是需要进行 checkpoint
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap

    // createStream 返回的是一个 Tuple2，具有 key，value，这里只关注 value.
    // 注意这里是 Receiver-based 方式（还提供了 non-receiver 模式），默认配置下，这种方式是会在 receiver 挂掉
    // 丢失数据的，需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整.
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
    val words = lines.flatMap(_.split(" "))

    // 统计的是 10 分钟内的单词数量，每隔 10 秒统计 1 次
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2).
      filter(x => x._2 > 0)

    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    // 需要注意的是这里是 broker list，为 host:port,host:port 形式
    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new util.HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while (true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}

Source File: WriteToKafka.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.kafka

import java.util

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import java.util.Properties

import org.apache.spark.sql.SparkSession
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.Producer
import org.apache.kafka.clients.producer.ProducerRecord

import scala.collection.mutable

class WriteToKafka extends ConfigurableStop{
  val description: String = "Write data to kafka"
  val inportList: List[String] = List(Port.DefaultPort)
  val outportList: List[String] = List(Port.DefaultPort)
  var kafka_host:String =_
  var topic:String=_

  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val df = in.read()
    val properties:Properties  = new Properties()
    properties.put("bootstrap.servers", kafka_host)
    properties.put("acks", "all")
    //properties.put("retries", 0)
    //properties.put("batch.size", 16384)
    //properties.put("linger.ms", 1)
    //properties.put("buffer.memory", 33554432)
    properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    var producer:Producer[String,String]  = new KafkaProducer[String,String](properties)

    df.collect().foreach(row=>{
      //var hm:util.HashMap[String,String]=new util.HashMap()
      //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String])))
      var res:List[String]=List()
      row.schema.fields.foreach(f=>{
          if(row.getAs(f.name)==null)res="None"::res
          else{
            res=row.getAs(f.name).asInstanceOf[String]::res
          }
        })
      val s:String=res.reverse.mkString(",")
      val record=new ProducerRecord[String,String](topic,s)
      producer.send(record)
    })
    producer.close()
  }


  def initialize(ctx: ProcessContext): Unit = {

  }


  def setProperties(map: Map[String, Any]): Unit = {
    kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String]
    //port=Integer.parseInt(MapUtil.get(map,key="port").toString)
    topic=MapUtil.get(map,key="topic").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true)
    val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true)
    descriptor = kafka_host :: descriptor
    descriptor = topic :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/kafka/WriteToKafka.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.KafkaGroup.toString)
  }

  override val authorEmail: String = "[email protected]"
}

Source File: KafkaWordCount.scala From drizzle-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: KafkaPublisher.scala From etl-light with MIT License

5 votes

package yamrcraft.etlite

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

class KafkaPublisher {

  val props = new Properties()
  props.put("bootstrap.servers", "localhost:9092")
  props.put("partition.assignment.strategy", "range")
  props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")

  val producer = new KafkaProducer[Array[Byte], Array[Byte]](props)

  def send(topic: String, event: Array[Byte]): Unit = {
    producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
  }

  def send(topic: String, events: List[Array[Byte]]): Unit = {
    for (event <- events) {
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
    }
  }

}

Source File: SessionKafkaProducer.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.session

import java.util.{Properties, Random}

import net.liftweb.json.DefaultFormats
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import net.liftweb.json.Serialization.write

object SessionKafkaProducer {
  def main(args:Array[String]): Unit = {

    implicit val formats = DefaultFormats

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)
    val numberOfEntities = args(3).toInt
    val numberOfMessagesPerEntity = args(4).toInt
    val waitTimeBetweenMessageBatch = args(5).toInt
    val chancesOfMissing = args(6).toInt

    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put("acks", "all")
    props.put("retries", "0")
    props.put("batch.size", "16384")
    props.put("linger.ms", "1")
    props.put("buffer.memory", "33554432")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    val r = new Random()
    var sentCount = 0

    println("About to send to " + topic)
    for (j <- 0 to numberOfMessagesPerEntity) {
      for (i <- 0 to numberOfEntities) {
        if (r.nextInt(chancesOfMissing) != 0) {
          val message = write(HeartBeat(i.toString, System.currentTimeMillis()))
          val producerRecord = new ProducerRecord[String,String](topic, message)
          producer.send(producerRecord)
          sentCount += 1
        }
      }
      println("Sent Count:" + sentCount)
      Thread.sleep(waitTimeBetweenMessageBatch)
    }

    producer.close()
  }
}

Source File: KafkaWordCount.scala From spark1.52 with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils



    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
//
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: FailingKafkaStorage.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.kafka

import java.util.concurrent.Future

import io.amient.affinity.core.storage.{LogStorageConf, Record}
import io.amient.affinity.core.util.MappedJavaFuture
import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}


class FailingKafkaStorage(conf: LogStorageConf) extends KafkaLogStorage(conf) {

  override def append(record: Record[Array[Byte], Array[Byte]]): Future[java.lang.Long] = {
    val producerRecord = new ProducerRecord(topic, null, record.timestamp, record.key, record.value)
    new MappedJavaFuture[RecordMetadata, java.lang.Long](producer.send(producerRecord)) {
      override def map(result: RecordMetadata): java.lang.Long = {
        if (System.currentTimeMillis() % 3 == 0) throw new RuntimeException("simulated kafka producer error")
        result.offset()
      }
    }
  }

}

Source File: TransactionalProducer.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.kafka

import java.util.Properties

import akka.actor.Actor
import akka.actor.Status.{Failure, Success}
import akka.event.Logging
import com.typesafe.config.Config
import io.amient.affinity.Conf
import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord}
import io.amient.affinity.core.config.CfgStruct
import io.amient.affinity.core.storage.StorageConf
import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf}
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer

import scala.collection.JavaConverters._

object KafkaConf extends KafkaConf {
  override def apply(config: Config): KafkaConf = new KafkaConf().apply(config)
}

class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) {
  val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer")
  val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports")
  val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports")
}

class TransactionalProducer extends Actor {

  val logger = Logging.getLogger(context.system, this)

  private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null

  val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage)
  val producerConfig = new Properties() {
    if (kafkaConf.Producer.isDefined) {
      val producerConfig = kafkaConf.Producer.toMap()
      if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer")
      if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer")
      if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer")
      producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) =>
        put(entry.getKey, entry.getValue.apply.toString)
      }
    }
    put("bootstrap.servers", kafkaConf.BootstrapServers())
    put("value.serializer", classOf[ByteArraySerializer].getName)
    put("key.serializer", classOf[ByteArraySerializer].getName)
  }

  override def receive: Receive = {

    case req@TransactionBegin(transactionalId) => req(sender) ! {
      if (producer == null) {
        producerConfig.put("transactional.id", transactionalId)
        producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig)
        logger.debug(s"Transactions.Init(transactional.id = $transactionalId)")
        producer.initTransactions()
      }
      logger.debug("Transactions.Begin()")
      producer.beginTransaction()
    }

    case TransactionalRecord(topic, key, value, timestamp, partition) =>
      val replyto = sender
      val producerRecord = new ProducerRecord(
        topic,
        partition.map(new Integer(_)).getOrElse(null),
        timestamp.map(new java.lang.Long(_)).getOrElse(null),
        key,
        value)
      logger.debug(s"Transactions.Append(topic=$topic)")
      producer.send(producerRecord, new Callback {
        override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
          if (exception != null) {
            replyto ! Failure(exception)
          } else {
            replyto ! Success(metadata.offset())
          }
        }
      })

    case req@TransactionCommit() => req(sender) ! {
      logger.debug("Transactions.Commit()")
      producer.commitTransaction()
    }

    case req@TransactionAbort() => req(sender) ! {
      logger.debug("Transactions.Abort()")
      producer.abortTransaction()
    }
  }
}

Source File: ExampleExternalStateSpec.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.example

import java.util.Properties

import com.typesafe.config.ConfigFactory
import io.amient.affinity.core.cluster.Node
import io.amient.affinity.core.util.AffinityTestBase
import io.amient.affinity.kafka.EmbeddedKafka
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.scalatest.concurrent.TimeLimitedTests
import org.scalatest.time.{Millis, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._

class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll
  with TimeLimitedTests {

  override def numPartitions = 2

  val config = configure(ConfigFactory.load("example-external-state"))

  val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic")

  val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap)))

  override def beforeAll: Unit = try {
    createTopic(topic)
    val externalProducer = createKafkaAvroProducer[String, String]()
    try {
      externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again"))
      externalProducer.flush()
    } finally {
      externalProducer.close()
    }
    //the external fixture is produced and the externalProducer is flushed() before the node is started
    node.start()
    node.awaitClusterReady()
    //at this point all stores have loaded everything available in the external topic so the test will be deterministic
  } finally {
    super.beforeAll()
  }

  override def afterAll: Unit = try {
    node.shutdown()
  } finally {
    super.afterAll()
  }

  behavior of "External State"

  val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time

  it should "start automatically tailing state partitions on startup even when master" in {
    //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above
    //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic
    val response = node.get_text(node.http_get(s"/news/latest"))
    response should include("10:30\tthe universe is expanding")
    response should include("11:00\tthe universe is still expanding")
    response should include("11:30\tthe universe briefly contracted but is expanding again")

  }

  private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties {
    put("bootstrap.servers", kafkaBootstrap)
    put("acks", "1")
    put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    //this simply adds all configs required by KafkaAvroSerializer
    config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) =>
      put(entry.getKey, entry.getValue.unwrapped())
    }
  })


}

Source File: KafkaWordCount.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}

Source File: KafkaWordCount.scala From multi-tenancy-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: WordCountTestableSpec.scala From kafka-streams with Apache License 2.0

5 votes

package com.supergloo.examples

import com.supergloo.WordCountTestable
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.kafka.streams.TopologyTestDriver
import org.apache.kafka.streams.state.KeyValueStore
import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.scalatest.{FlatSpec, Matchers}

class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup {

  val wordCountApplication = new WordCountTestable

  "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in {
    val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello, WORLDY, World worlD Test"
    driver.pipeInput(recordFactory.create(words))
    val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer())
    record.value() shouldBe words.toLowerCase
    driver.close()
  }

  "WordCountTestable" should "count number of words" in {
    val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config)
    val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer())
    val words = "Hello Kafka Streams, All streams lead to Kafka"
    driver.pipeInput(recordFactory.create(words))
    val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store")
    store.get("hello") shouldBe 1
    store.get("kafka") shouldBe 2
    store.get("streams") shouldBe 2
    store.get("lead") shouldBe 1
    store.get("to") shouldBe 1
    driver.close()

  }

}

Source File: PredictionLogger.scala From ForestFlow with Apache License 2.0

5 votes

package ai.forestflow.event.subscribers

import java.nio.ByteOrder

import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import ai.forestflow.serving.config.ApplicationEnvironment
import akka.actor.{Actor, ActorLogging, Props}
import akka.kafka.ProducerSettings
import ai.forestflow.domain.{PredictionEvent, PredictionEventGP}
import graphpipe.InferRequest
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer}
//import scalapb.json4s.JsonFormat

import scala.util.{Success, Try}

object PredictionLogger {
  

  private lazy val binaryProducerSettings =
    ProducerSettings(producerConfig, new StringSerializer, new ByteArraySerializer)
  private lazy val binaryProducer = binaryProducerSettings.createKafkaProducer()

  override def preStart(): Unit = {
    if (basic_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEvent])

    if (gp_topic.isDefined)
      context.system.eventStream.subscribe(self, classOf[PredictionEventGP])
    super.preStart()
  }
  override def receive: Receive = {
    case event@PredictionEvent(prediction, servedRequest, inferenceRequest, loggingSettings) =>

      val key = loggingSettings
        .keyFeatures
        .flatMap(inferenceRequest.configs.get)
        .mkString(loggingSettings.getKeyFeaturesSeparator)

      if (key.length > 0 )
        binaryProducer.send(new ProducerRecord(basic_topic.get, key, event.toByteArray))
      else
        binaryProducer.send(new ProducerRecord(basic_topic.get, event.toByteArray))

    case event@PredictionEventGP(prediction, servedRequest, inferBytes, loggingSettings) =>
      Try {
        val req = graphpipe.Request.getRootAsRequest(inferBytes.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN))
        val inferRequest = req.req(new InferRequest()).asInstanceOf[InferRequest]
        val inferConfigs = inferRequest.config()
          .split(",")
          .map(_.split(":"))
          .flatMap{ case Array(k, v) =>  Some((k, v)) case _ => None}.toMap

        loggingSettings
          .keyFeatures
          .flatMap(inferConfigs.get)
          .mkString(loggingSettings.getKeyFeaturesSeparator)

      } match {
        case Success(key) =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, key, event.toByteArray))
        case _ =>
          binaryProducer.send(new ProducerRecord(gp_topic.get, event.toByteArray))
      }

    case _ => // ignore
  }
}

Source File: ExternalKafkaProcessorSupplier.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.kstream.processor

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.trends.config.entities.KafkaProduceConfiguration
import com.expedia.www.haystack.trends.kstream.serde.TrendMetricSerde.metricRegistry
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.streams.processor.{AbstractProcessor, Processor, ProcessorContext, ProcessorSupplier}
import org.slf4j.LoggerFactory

class ExternalKafkaProcessorSupplier(kafkaProduceConfig: KafkaProduceConfiguration) extends ProcessorSupplier[String, MetricData] {

  private val LOGGER = LoggerFactory.getLogger(this.getClass)
  private val metricPointExternalKafkaSuccessMeter = metricRegistry.meter("metricpoint.kafka-external.success")
  private val metricPointExternalKafkaFailureMeter = metricRegistry.meter("metricpoint.kafka-external.failure")

  def get: Processor[String, MetricData] = {
    new ExternalKafkaProcessor(kafkaProduceConfig: KafkaProduceConfiguration)
  }

  
    def process(key: String, value: MetricData): Unit = {

      val kafkaMessage = new ProducerRecord(kafkaProduceTopic,
        key, value)
      kafkaProducer.send(kafkaMessage, new Callback {
        override def onCompletion(recordMetadata: RecordMetadata, e: Exception): Unit = {
          if (e != null) {
            LOGGER.error(s"Failed to produce the message to kafka for topic=$kafkaProduceTopic, with reason=", e)
            metricPointExternalKafkaFailureMeter.mark()
          } else {
            metricPointExternalKafkaSuccessMeter.mark()
          }
        }
      })
    }
  }
}

Source File: KafkaClient.scala From mist with Apache License 2.0

5 votes

package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

}

Source File: KafkaWordCount.scala From sparkoscope with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println

Source File: ProcessingKafkaApplication.scala From Akka-Cookbook with MIT License

5 votes

package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
}

Source File: KafkaSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka

import java.util.Properties

import com.twitter.bijection.Injection
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.mockito.Mockito._
import org.scalacheck.Gen
import org.scalatest.mock.MockitoSugar
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.MockUtil

class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar {

  val dataGen = for {
    topic <- Gen.alphaStr
    key <- Gen.alphaStr
    msg <- Gen.alphaStr
  } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg))

  property("KafkaSink write should send producer record") {
    forAll(dataGen) {
      (data: (String, Array[Byte], Array[Byte])) =>
        val props = mock[Properties]
        val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
        val producerFactory = mock[KafkaProducerFactory]
        val configFactory = mock[KafkaConfigFactory]
        val config = mock[KafkaConfig]

        when(configFactory.getKafkaConfig(props)).thenReturn(config)
        when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

        val (topic, key, msg) = data
        val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory)
        kafkaSink.write(Message((key, msg)))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg)))
        kafkaSink.write(Message(msg))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg)
        ))
        kafkaSink.close()
    }
  }

  property("KafkaSink close should close kafka producer") {
    val props = mock[Properties]
    val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
    val producerFactory = mock[KafkaProducerFactory]
    val configFactory = mock[KafkaConfigFactory]
    val config = mock[KafkaConfig]

    when(configFactory.getKafkaConfig(props)).thenReturn(config)
    when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

    val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory)
    kafkaSink.close()
    verify(producer).close()
  }
}

Source File: KafkaStore.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.lib.store

import java.util.Properties

import com.twitter.bijection.Injection
import kafka.api.OffsetRequest
import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory}
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer


class KafkaStore private[kafka](
    val topic: String,
    val producer: KafkaProducer[Array[Byte], Array[Byte]],
    val optConsumer: Option[KafkaConsumer])
  extends CheckpointStore {
  import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._

  private var maxTime: MilliSeconds = 0L

  override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = {
    // make sure checkpointed timestamp is monotonically increasing
    // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3)
    if (time > maxTime) {
      maxTime = time
    }
    val key = maxTime
    val value = checkpoint
    val message = new ProducerRecord[Array[Byte], Array[Byte]](
      topic, 0, Injection[Long, Array[Byte]](key), value)
    producer.send(message)
    LOG.debug("KafkaStore persisted state ({}, {})", key, value)
  }

  override def recover(time: MilliSeconds): Option[Array[Byte]] = {
    var checkpoint: Option[Array[Byte]] = None
    optConsumer.foreach { consumer =>
      while (consumer.hasNext && checkpoint.isEmpty) {
        val kafkaMsg = consumer.next()
        checkpoint = for {
          k <- kafkaMsg.key
          t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption
          c = kafkaMsg.msg if t >= time
        } yield c
      }
      consumer.close()
    }
    checkpoint match {
      case Some(c) =>
        LOG.info(s"KafkaStore recovered checkpoint ($time, $c)")
      case None =>
        LOG.info(s"no checkpoint existing for $time")
    }
    checkpoint
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaStore closed")
  }
}

Source File: AbstractKafkaSink.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.lib.sink

import java.util.Properties

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.sink.DataSink
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer

object AbstractKafkaSink {
  private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink])

  val producerFactory = new KafkaProducerFactory {
    override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = {
      new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig,
        new ByteArraySerializer, new ByteArraySerializer)
    }
  }

  trait KafkaProducerFactory extends java.io.Serializable {
    def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]]
  }
}

abstract class AbstractKafkaSink private[kafka](
    topic: String,
    props: Properties,
    kafkaConfigFactory: KafkaConfigFactory,
    factory: KafkaProducerFactory) extends DataSink {
  import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._

  def this(topic: String, props: Properties) = {
    this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory)
  }

  private lazy val config = kafkaConfigFactory.getKafkaConfig(props)
  // Lazily construct producer since KafkaProducer is not serializable
  private lazy val producer = factory.getKafkaProducer(config)

  override def open(context: TaskContext): Unit = {
    LOG.info("KafkaSink opened")
  }

  override def write(message: Message): Unit = {
    message.value match {
      case (k: Array[Byte], v: Array[Byte]) =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case v: Array[Byte] =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case m =>
        val errorMsg = s"unexpected message type ${m.getClass}; " +
          s"Array[Byte] or (Array[Byte], Array[Byte]) required"
        LOG.error(errorMsg)
    }
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaSink closed")
  }
}

Source File: NumericalDataProducer.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.integrationtest.kafka

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.apache.log4j.Logger

import org.apache.gearpump.streaming.serializer.ChillSerializer

class NumericalDataProducer(topic: String, bootstrapServers: String) {

  private val LOG = Logger.getLogger(getClass)
  private val producer = createProducer
  private val WRITE_SLEEP_NANOS = 10
  private val serializer = new ChillSerializer[Int]
  var lastWriteNum = 0

  def start(): Unit = {
    produceThread.start()
  }

  def stop(): Unit = {
    if (produceThread.isAlive) {
      produceThread.interrupt()
      produceThread.join()
    }
    producer.close()
  }

  
  def producedNumbers: Range = {
    Range(1, lastWriteNum + 1)
  }

  private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = {
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", bootstrapServers)
    new KafkaProducer[Array[Byte], Array[Byte]](properties,
      new ByteArraySerializer, new ByteArraySerializer)
  }

  private val produceThread = new Thread(new Runnable {
    override def run(): Unit = {
      try {
        while (!Thread.currentThread.isInterrupted) {
          lastWriteNum += 1
          val msg = serializer.serialize(lastWriteNum)
          val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg)
          producer.send(record)
          Thread.sleep(0, WRITE_SLEEP_NANOS)
        }
      } catch {
        case ex: InterruptedException =>
          LOG.error("message producing is stopped by an interrupt")
      }
    }
  })
}

Source File: KafkaProducerInjector.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.aggregates

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}


object KafkaProducerInjector {


  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("{brokerList} {topic} {#OfRecords} {sleepTimeEvery10Records} {#OfGamers}")
      return
    }

    val brokerList = args(0)
    val topic = args(1)
    val numOfRecords = args(2).toInt
    val sleepTimeEvery10Records = args(3).toInt
    val numOfGamers = args(4).toInt

    val producer = getNewProducer(brokerList)

    for (i <- 0 until numOfRecords) {

      val gamerRecord = GamerDataGenerator.makeNewGamerRecord(numOfGamers)

      val message = new ProducerRecord[String, String](topic, gamerRecord.gamerId.toString,  gamerRecord.toString())

      producer.send(message)

      if (i % 10 == 0) {
        Thread.sleep(sleepTimeEvery10Records)
        print(".")
      }
      if (i % 2000 == 0) {
        println()
        println("Records Sent:" + i)
        println()
      }
    }
  }

  def getNewProducer(brokerList:String): KafkaProducer[String, String] = {
    val kafkaProps = new Properties
    kafkaProps.put("bootstrap.servers", brokerList)
    kafkaProps.put("metadata.broker.list", brokerList)

    // This is mandatory, even though we don't send keys
    kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("acks", "0")

    // how many times to retry when produce request fails?
    kafkaProps.put("retries", "3")
    kafkaProps.put("linger.ms", "2")
    kafkaProps.put("batch.size", "1000")
    kafkaProps.put("queue.time", "2")

    new KafkaProducer[String, String](kafkaProps)
  }


}

Source File: KafkaOutput.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.plugin.output.kafka

import java.io.{Serializable => JSerializable}
import java.util.Properties

import com.stratio.sparta.plugin.input.kafka.KafkaBase
import com.stratio.sparta.sdk.pipeline.output.Output._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import com.stratio.sparta.sdk.properties.CustomProperties
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.kafka.clients.producer.ProducerConfig._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.sql._

import scala.collection.mutable

class KafkaOutput(name: String, properties: Map[String, JSerializable])
  extends Output(name, properties) with KafkaBase with CustomProperties {

  val DefaultKafkaSerializer = classOf[StringSerializer].getName
  val DefaultAck = "0"
  val DefaultBatchNumMessages = "200"
  val DefaultProducerPort = "9092"

  override val customKey = "KafkaProperties"
  override val customPropertyKey = "kafkaPropertyKey"
  override val customPropertyValue = "kafkaPropertyValue"

  val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase)
  val rowSeparator = properties.getString("rowSeparator", ",")

  override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append)

  override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = {
    val tableName = getTableNameFromOptions(options)

    validateSaveMode(saveMode)

    outputFormat match {
      case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages =>
        messages.foreach(message => send(tableName, message.mkString(rowSeparator))))
      case _ => dataFrame.toJSON.foreachPartition { messages =>
        messages.foreach(message => send(tableName, message))
      }
    }
  }

  def send(topic: String, message: String): Unit = {
    val record = new ProducerRecord[String, String](topic, message)
    KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record)
  }

  private[kafka] def getProducerConnectionKey: String =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort)
      .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list"))

  private[kafka] def createProducerProps: Properties = {
    val props = new Properties()
    properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) }
    mandatoryOptions.foreach { case (key, value) => props.put(key, value) }
    getCustomProperties.foreach { case (key, value) => props.put(key, value) }
    props
  }

  private[kafka] def mandatoryOptions: Map[String, String] =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++
      Map(
        KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck),
        BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages)
      )

  override def cleanUp(options: Map[String, String]): Unit = {
    log.info(s"Closing Kafka producer in Kafka Output: $name")
    KafkaOutput.closeProducers()
  }
}

object KafkaOutput {

  private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty

  def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = {
    getInstance(producerKey, properties)
  }

  def closeProducers(): Unit = {
    producers.values.foreach(producer => producer.close())
  }

  private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = {
    producers.getOrElse(key, {
      val producer = new KafkaProducer[String, String](properties)
      producers.put(key, producer)
      producer
    })
  }
}

Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
}

org.apache.kafka.clients.producer.ProducerRecord Scala Examples