org.apache.kafka.clients.producer.ProducerRecord Scala Examples
The following examples show how to use org.apache.kafka.clients.producer.ProducerRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: IntegrationTest.scala From kmq with Apache License 2.0 | 6 votes |
package com.softwaremill.kmq.redelivery import java.time.Duration import java.util.Random import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions} import akka.stream.ActorMaterializer import akka.testkit.TestKit import com.softwaremill.kmq._ import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.concurrent.Eventually import org.scalatest.time.{Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers} import scala.collection.mutable.ArrayBuffer class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers { implicit val materializer = ActorMaterializer() import system.dispatcher "KMQ" should "resend message if not committed" in { val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}" val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis, 1000) val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServer) .withGroupId(kmqConfig.getMsgConsumerGroupId) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val markerProducerSettings = ProducerSettings(system, new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer()) .withBootstrapServers(bootstrapServer) .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName) val markerProducer = markerProducerSettings.createKafkaProducer() val random = new Random() lazy val processedMessages = ArrayBuffer[String]() lazy val receivedMessages = ArrayBuffer[String]() val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic .map { msg => ProducerMessage.Message( new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg) } .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker .map(_.message.passThrough) .mapAsync(1) { msg => msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched } .map { msg => receivedMessages += msg.value msg } .filter(_ => random.nextInt(5) != 0) .map { processedMessage => processedMessages += processedMessage.value new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE) } .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers .run() val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig) val messages = (0 to 20).map(_.toString) messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg)) eventually { receivedMessages.size should be > processedMessages.size processedMessages.sortBy(_.toInt).distinct shouldBe messages }(PatienceConfig(timeout = Span(15, Seconds)), implicitly) redeliveryHook.close() control.shutdown() } override def afterAll(): Unit = { super.afterAll() TestKit.shutdownActorSystem(system) } }
Example 2
Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafka def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 3
Source File: ExampleApp.scala From kafka4s with Apache License 2.0 | 5 votes |
package example3 import cats.effect._ import cats.implicits._ import fs2.Stream import com.banno.kafka._ import com.banno.kafka.admin._ import com.banno.kafka.consumer._ import com.banno.kafka.producer._ import org.apache.kafka.clients.admin.NewTopic import org.apache.kafka.clients.producer.ProducerRecord import scala.concurrent.duration._ import scala.util.Random final class ExampleApp[F[_]: Concurrent: ContextShift: Timer] { // Change these for your environment as needed val topic = new NewTopic(s"example3", 1, 3.toShort) val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093" val example: F[Unit] = for { _ <- Sync[F].delay(println("Starting kafka4s example")) _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic) writeStream = Stream .resource(ProducerApi.resource[F, Int, Int](BootstrapServers(kafkaBootstrapServers))) .flatMap { producer => Stream .awakeDelay[F](1 second) .evalMap { _ => Sync[F].delay(Random.nextInt()).flatMap { i => producer.sendAndForget(new ProducerRecord(topic.name, i, i)) } } } readStream = Stream .resource( ConsumerApi .resource[F, Int, Int]( BootstrapServers(kafkaBootstrapServers), GroupId("example3"), AutoOffsetReset.earliest, EnableAutoCommit(true) ) ) .evalTap(_.subscribe(topic.name)) .flatMap( _.recordStream(1.second) .map(_.value) .filter(_ % 2 == 0) .evalMap(i => Sync[F].delay(println(i))) ) _ <- writeStream .merge(readStream) .onFinalize(Sync[F].delay(println("Finished kafka4s example"))) .compile .drain } yield () } object ExampleApp { def apply[F[_]: Concurrent: ContextShift: Timer] = new ExampleApp[F] }
Example 4
Source File: KafkaStreamingWriterRDD.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.writer import com.github.benfradet.spark.kafka010.writer._ import org.apache.kafka.clients.producer.ProducerRecord import org.apache.spark.sql.SparkSession object KafkaStreamingWriterRDD extends App { val spark = SparkSession .builder .master("local") .appName("streaming-writer") .getOrCreate() setupLogging() val numbersRDD = spark.range(400, 420).rdd numbersRDD.writeToKafka( numbersProducerConfig, s => new ProducerRecord[String, String](topic, "key " + s , s.toString) ) }
Example 5
Source File: CommitConsumerToFlowProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.integration import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ProducerMessage, Subscriptions} import akka.stream.scaladsl.Sink import com.example._ import org.apache.kafka.clients.producer.ProducerRecord object CommitConsumerToFlowProducer extends App { val done = Consumer.committableSource(consumerSettings, Subscriptions.topics(topic1)) .map { msg => println(s"topic1 -> topic2: $msg") ProducerMessage.Message(new ProducerRecord[Array[Byte], String]( topic2, msg.record.value ), msg.committableOffset) } .via(Producer.flow(producerSettings)) .mapAsync(producerSettings.parallelism) { result => result.message.passThrough.commitScaladsl() } .runWith(Sink.ignore) }
Example 6
Source File: CommitConsumerToSinkProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.integration import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ProducerMessage, Subscriptions} import org.apache.kafka.clients.producer.ProducerRecord import com.example._ object CommitConsumerToSinkProducer { val done = Consumer.committableSource(consumerSettings, Subscriptions.topics(topic1)) .map { msg => println(s"topic1 -> topic2: $msg") ProducerMessage.Message(new ProducerRecord[Array[Byte], String]( topic2, msg.record.value ), msg.committableOffset) } .runWith(Producer.commitableSink(producerSettings)) }
Example 7
Source File: FlowProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.producer import akka.kafka.ProducerMessage import akka.kafka.scaladsl.Producer import akka.stream.scaladsl.{Sink, Source} import org.apache.kafka.clients.producer.ProducerRecord import com.example._ object FlowProducer extends App { val done = Source(100 to 111) .map { n => val partition = 1 ProducerMessage.Message(new ProducerRecord[Array[Byte], String]( topic , partition, null, n.toString ), n) } .via(Producer.flow(producerSettings)) .map { result => val record = result.message.record println(s"${record.topic}/${record.partition} ${result.offset}: ${record.value}" + s"(${result.message.passThrough})") result } .runWith(Sink.ignore) }
Example 8
Source File: SimpleProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.producer import java.util.{Properties} import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} object SimpleProducer extends App{ val topic = "sample_topic" private val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String,String](props) try { for(i <- 0 to 10) { producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic")) println(s"Sent: $i") } println("Message sent successfully") producer.close() } catch { case ex: Exception => ex.printStackTrace() } }
Example 9
Source File: AkkaActorKafkaProduder.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example import akka.actor.ActorSystem import cakesolutions.kafka.KafkaProducerRecord import cakesolutions.kafka.akka.{KafkaProducerActor, ProducerRecords} import org.apache.kafka.clients.producer.ProducerRecord object AkkaActorKafkaProduder { def main(args: Array[String]): Unit = { actorProducer() } private def actorProducer() = { val system = ActorSystem() val producer = system.actorOf(KafkaProducerActor.props(kafkaProducerConf)) val batch: Seq[ProducerRecord[String, String]] = Seq( KafkaProducerRecord(topic, "foo"), KafkaProducerRecord(topic, "key", "value"), KafkaProducerRecord(topic, "bar") ) val message = ProducerRecords(batch) producer ! message } }
Example 10
Source File: BasicTest.scala From kafka-testing with Apache License 2.0 | 5 votes |
package com.landoop.kafka.testing import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.kafka.clients.producer.ProducerRecord class BasicTest extends ClusterTestingCapabilities { private val createAvroRecord = { val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}" val parser = new Schema.Parser val schema = parser.parse(userSchema) val avroRecord = new GenericData.Record(schema) avroRecord.put("name", "testUser") avroRecord } "KCluster" should { "start up and be able to handle avro records being sent " in { val topic = "testAvro" + System.currentTimeMillis() val avroRecord = createAvroRecord val objects = Array[AnyRef](avroRecord) val producerProps = stringAvroProducerProps val producer = createProducer[String, Any](producerProps) for (o <- objects) { val message = new ProducerRecord[String, Any](topic, o) producer.send(message) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.toSeq shouldBe records } "write and read avro records" in { val topic = "testAvro" + System.currentTimeMillis() val avroRecord = createAvroRecord val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes) val producerProps = stringAvroProducerProps val producer = createProducer[String, Any](producerProps) for (o <- objects) { producer.send(new ProducerRecord[String, Any](topic, o)) } val consumerProps = stringAvroConsumerProps("group" + System.currentTimeMillis()) val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.deep shouldBe records.toArray.deep } } }
Example 11
Source File: package.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.events import java.util import com.wavesplatform.events.protobuf.PBEvents import com.wavesplatform.events.settings.BlockchainUpdatesSettings import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.config.SaslConfigs import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer} package object kafka { private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def close(): Unit = {} override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] = PBEvents.protobuf(data).toByteArray } private object IntSerializer extends Serializer[Int] { val integerSerializer = new IntegerSerializer override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey) override def close(): Unit = integerSerializer.close() override def serialize(topic: String, data: Int): Array[Byte] = integerSerializer.serialize(topic, data) } def createProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = new util.Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers) props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId) // props.put(ProducerConfig.RETRIES_CONFIG, "0") // SASL_SSL if (settings.ssl.enabled) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL") props.put(SaslConfigs.SASL_MECHANISM, "PLAIN") props.put( SaslConfigs.SASL_JAAS_CONFIG, s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';" ) } props } def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = createProperties(settings) props.put(ProducerConfig.ACKS_CONFIG, "all") props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB props } def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] = new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer) def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = { val h = event match { case ap: BlockAppended => ap.toHeight case MicroBlockAppended(_, height, _, _, _) => height case RollbackCompleted(_, height) => height case MicroBlockRollbackCompleted(_, height) => height } new ProducerRecord[Int, BlockchainUpdated](topic, h, event) } }
Example 12
Source File: HiveOrcBenchmark.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.hive.it import org.apache.kafka.clients.producer.ProducerRecord import scala.concurrent.Future import scala.io.Source import scala.util.Try object HiveOrcBenchmark extends App with PersonTestData with HiveTests { import scala.concurrent.ExecutionContext.Implicits.global val start = System.currentTimeMillis() val topic = createTopic() val taskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_sink_task_no_partitions-orc.json")).getLines().mkString("\n") .replace("{{TOPIC}}", topic) .replace("{{TABLE}}", topic) .replace("{{NAME}}", topic) postTask(taskDef) Future { val producer = stringStringProducer() val count = 10000000 // 10mil for (k <- 0 until count) { producer.send(new ProducerRecord(topic, JacksonSupport.mapper.writeValueAsString(person))) if (k % 100000 == 0) { println(s"Flushing records [total=$count]") producer.flush() } } producer.flush() producer.close() } Future { while (true) { Try { Thread.sleep(2000) withConn { conn => val stmt = conn.createStatement val rs = stmt.executeQuery(s"select count(*) from $topic") rs.next() val total = rs.getLong(1) val time = System.currentTimeMillis() - start println(s"Total $total in ${time}ms which is ${total / (time / 1000)} records per second") } } } stopTask(topic) } }
Example 13
Source File: Producer.scala From fusion-data with Apache License 2.0 | 5 votes |
package kafkasample.demo import java.util.Properties import java.util.concurrent.TimeUnit import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata } object Producer { def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) try { run(producer) } finally { TimeUnit.SECONDS.sleep(5) producer.close() } } private def run[K, V](producer: KafkaProducer[String, String]) { val record = new ProducerRecord[String, String]("customerCountries", "羊八井222") producer.send(record, (metadata: RecordMetadata, e: Exception) => { if (e ne null) { e.printStackTrace() } println(s"metadata: $metadata") }) } }
Example 14
Source File: KafkaBatchProducer.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.kafka2.writer import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.immutable.Map import scala.language.implicitConversions import scala.reflect.runtime.universe._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants} import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException} def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = { def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName() logger.info(" @Begin --> " + MethodName) val kafkaProps: Properties = conf.kafkaProducerProps logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}") val kafkaTopic = conf.kafkaTopics val kafkaTopicsOptionsMap : Map[String, Map[String, String]] = KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf) logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap) val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap) logger.info("Begin Publishing to Kafka....") try { val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic) kafkaTopicOptions match { case None => throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""") case Some(kafkaOptions) => dataFrame .write .format(KafkaConstants.KAFKA_FORMAT) .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic) .options(kafkaOptions) .save() } } catch { case ex: Throwable => { ex.printStackTrace() val msg = s""" |kafkaTopic -> ${kafkaTopic} |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}} """.stripMargin throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}") } } logger.info("Publish to Kafka - Completed !") } }
Example 15
Source File: FlinkKafkaCodecSerde.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.flink import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.streaming.connectors.kafka._ import cloudflow.streamlets.{ CodecInlet, CodecOutlet } private[flink] class FlinkKafkaCodecSerializationSchema[T: TypeInformation](outlet: CodecOutlet[T], topic: String) extends KafkaSerializationSchema[T] { override def serialize(value: T, timestamp: java.lang.Long): ProducerRecord[Array[Byte], Array[Byte]] = new ProducerRecord(topic, outlet.codec.encode(value)) } private[flink] class FlinkKafkaCodecDeserializationSchema[T: TypeInformation](inlet: CodecInlet[T]) extends KafkaDeserializationSchema[T] { override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): T = inlet.codec.decode(record.value) override def isEndOfStream(value: T): Boolean = false override def getProducedType: TypeInformation[T] = implicitly[TypeInformation[T]] }
Example 16
Source File: KafkaSinkRef.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.akkastream import scala.concurrent._ import scala.util._ import akka._ import akka.actor.ActorSystem import akka.kafka._ import akka.kafka.ConsumerMessage._ import akka.kafka.scaladsl._ import akka.stream._ import akka.stream.scaladsl._ import org.apache.kafka.clients.producer.{ Callback, ProducerRecord, RecordMetadata } import org.apache.kafka.common.serialization._ import cloudflow.streamlets._ final class KafkaSinkRef[T]( system: ActorSystem, outlet: CodecOutlet[T], internalKafkaBootstrapServers: String, topic: Topic, killSwitch: SharedKillSwitch, completionPromise: Promise[Dun] ) extends WritableSinkRef[T] { private val producerSettings = ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer) .withBootstrapServers(topic.bootstrapServers.getOrElse(internalKafkaBootstrapServers)) .withProperties(topic.kafkaProducerProperties) private val producer = producerSettings.createKafkaProducer() def sink: Sink[(T, Committable), NotUsed] = { system.log.info(s"Creating sink for topic: $topic") Flow[(T, Committable)] .map { case (value, offset) ⇒ val key = outlet.partitioner(value) val bytesValue = outlet.codec.encode(value) ProducerMessage.Message[Array[Byte], Array[Byte], Committable](new ProducerRecord(topic.name, key.getBytes("UTF8"), bytesValue), offset) } .via(Producer.flexiFlow(producerSettings.withProducer(producer))) .via(handleTermination) .to(Sink.ignore) .mapMaterializedValue(_ ⇒ NotUsed) } private def handleTermination[I]: Flow[I, I, NotUsed] = Flow[I] .via(killSwitch.flow) .alsoTo( Sink.onComplete { case Success(_) ⇒ system.log.error(s"Stream has completed. Shutting down streamlet...") completionPromise.success(Dun) case Failure(e) ⇒ system.log.error(e, "Stream has failed. Shutting down streamlet...") completionPromise.failure(e) } ) def write(value: T): Future[T] = { val key = outlet.partitioner(value) val bytesKey = keyBytes(key) val bytesValue = outlet.codec.encode(value) val record = new ProducerRecord(topic.name, bytesKey, bytesValue) val promise = Promise[T]() producer.send( record, new Callback() { def onCompletion(metadata: RecordMetadata, exception: Exception) { if (exception == null) promise.success(value) else promise.failure(exception) } } ) promise.future } private def keyBytes(key: String) = if (key != null) key.getBytes("UTF8") else null }
Example 17
Source File: TestProducer.scala From asura with MIT License | 5 votes |
package asura.kafka.producer import akka.Done import akka.actor.ActorSystem import akka.kafka.ProducerSettings import akka.kafka.scaladsl.Producer import akka.stream.ActorMaterializer import akka.stream.scaladsl.Source import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.Future object TestProducer extends StrictLogging { def main(args: Array[String]): Unit = { logger.info("Start producer") implicit val system = ActorSystem("producer") implicit val materializer = ActorMaterializer() implicit val ec = system.dispatcher val producerSettings = ProducerSettings(system, new StringSerializer, new StringSerializer) val done: Future[Done] = Source(1 to 100) .map(value => new ProducerRecord[String, String]("test-topic", s"msg ${value}")) .runWith(Producer.plainSink(producerSettings)) done onComplete { case scala.util.Success(_) => logger.info("Done"); system.terminate() case scala.util.Failure(err) => logger.error(err.toString); system.terminate() } } }
Example 18
Source File: TestAvroProducer.scala From asura with MIT License | 5 votes |
package asura.kafka.producer import akka.actor.ActorSystem import akka.kafka.ProducerSettings import akka.kafka.scaladsl.Producer import akka.stream.ActorMaterializer import akka.stream.scaladsl.Source import asura.kafka.avro.SampleAvroClass import com.typesafe.scalalogging.StrictLogging import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializerConfig, KafkaAvroSerializer} import org.apache.avro.specific.SpecificRecord import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization._ import scala.collection.JavaConverters._ // https://doc.akka.io/docs/alpakka-kafka/current/serialization.html object TestAvroProducer extends StrictLogging { def main(args: Array[String]): Unit = { implicit val system = ActorSystem("producer") implicit val materializer = ActorMaterializer() implicit val ec = system.dispatcher val schemaRegistryUrl = "" val bootstrapServers = "" val topic = "" val kafkaAvroSerDeConfig = Map[String, Any]( AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegistryUrl, KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString ) val producerSettings: ProducerSettings[String, SpecificRecord] = { val kafkaAvroSerializer = new KafkaAvroSerializer() kafkaAvroSerializer.configure(kafkaAvroSerDeConfig.asJava, false) val serializer = kafkaAvroSerializer.asInstanceOf[Serializer[SpecificRecord]] ProducerSettings(system, new StringSerializer, serializer) .withBootstrapServers(bootstrapServers) } val samples = (1 to 3).map(i => SampleAvroClass(s"key_$i", s"name_$i")) val done = Source(samples) .map(n => new ProducerRecord[String, SpecificRecord](topic, n.key, n)) .runWith(Producer.plainSink(producerSettings)) done onComplete { case scala.util.Success(_) => logger.info("Done"); system.terminate() case scala.util.Failure(err) => logger.error(err.toString); system.terminate() } } }
Example 19
Source File: KafkaWordCount.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 20
Source File: package.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka import org.scalacheck.{Arbitrary, Cogen, Gen} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.consumer.ConsumerRecord package object test { implicit def arbitraryProducerRecord[K: Arbitrary, V: Arbitrary] : Arbitrary[ProducerRecord[K, V]] = Arbitrary { for { t <- Gen.identifier k <- Arbitrary.arbitrary[K] v <- Arbitrary.arbitrary[V] } yield new ProducerRecord(t, k, v) } implicit def arbitraryConsumerRecord[K: Arbitrary, V: Arbitrary] : Arbitrary[ConsumerRecord[K, V]] = Arbitrary { for { t <- Gen.identifier p <- Gen.posNum[Int] o <- Gen.posNum[Long] k <- Arbitrary.arbitrary[K] v <- Arbitrary.arbitrary[V] } yield new ConsumerRecord(t, p, o, k, v) } //these things are necessary for EqSpec implicit def producerRecordCogen[K, V]: Cogen[ProducerRecord[K, V]] = Cogen(pr => pr.key.toString.length.toLong + pr.value.toString.length.toLong) // ¯\_(ツ)_/¯ implicit def consumerRecordCogen[K, V]: Cogen[ConsumerRecord[K, V]] = Cogen(cr => cr.key.toString.length.toLong + cr.value.toString.length.toLong) // ¯\_(ツ)_/¯ }
Example 21
Source File: PrometheusMetricsReporterApiSpec.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka.metrics.prometheus import scala.collection.compat._ import cats.implicits._ import cats.effect.IO import com.banno.kafka._ import com.banno.kafka.producer._ import com.banno.kafka.consumer._ import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition import io.prometheus.client.CollectorRegistry import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import scala.jdk.CollectionConverters._ import scala.concurrent.ExecutionContext import scala.concurrent.duration._ class PrometheusMetricsReporterApiSpec extends AnyFlatSpec with Matchers with InMemoryKafka { implicit val defaultContextShift = IO.contextShift(ExecutionContext.global) implicit val defaultConcurrent = IO.ioConcurrentEffect(defaultContextShift) implicit val defaultTimer = IO.timer(ExecutionContext.global) //when kafka clients change their metrics, this test will help identify the changes we need to make "Prometheus reporter" should "register Prometheus collectors for all known Kafka metrics" in { val topic = createTopic(2) val records = List(new ProducerRecord(topic, 0, "a", "a"), new ProducerRecord(topic, 1, "b", "b")) ProducerApi .resource[IO, String, String]( BootstrapServers(bootstrapServer), MetricReporters[ProducerPrometheusReporter] ) .use( p => ConsumerApi .resource[IO, String, String]( BootstrapServers(bootstrapServer), ClientId("c1"), MetricReporters[ConsumerPrometheusReporter] ) .use( c1 => ConsumerApi .resource[IO, String, String]( BootstrapServers(bootstrapServer), ClientId("c2"), MetricReporters[ConsumerPrometheusReporter] ) .use( c2 => for { _ <- p.sendSyncBatch(records) _ <- c1.assign(topic, Map.empty[TopicPartition, Long]) _ <- c1.poll(1 second) _ <- c1.poll(1 second) _ <- c2.assign(topic, Map.empty[TopicPartition, Long]) _ <- c2.poll(1 second) _ <- c2.poll(1 second) _ <- IO.sleep(PrometheusMetricsReporterApi.defaultUpdatePeriod + (1 second)) _ <- p.close _ <- c1.close _ <- c2.close } yield { val registry = CollectorRegistry.defaultRegistry registry.metricFamilySamples.asScala .count(_.name.startsWith("kafka_producer")) should ===(56) registry.metricFamilySamples.asScala .find(_.name == "kafka_producer_record_send_total") .map(_.samples.asScala.map(_.value)) should ===(Some(List(2))) registry.metricFamilySamples.asScala .count(_.name.startsWith("kafka_consumer")) should ===(50) registry.metricFamilySamples.asScala .find(_.name == "kafka_consumer_records_consumed_total") .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2))) registry.metricFamilySamples.asScala .find(_.name == "kafka_consumer_topic_records_consumed_total") .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2))) } ) ) ) .unsafeRunSync() } }
Example 22
Source File: ExampleApp.scala From kafka4s with Apache License 2.0 | 5 votes |
package example1 import cats.effect._ import cats.implicits._ import com.banno.kafka._ import com.banno.kafka.admin._ import com.banno.kafka.schemaregistry._ import com.banno.kafka.consumer._ import com.banno.kafka.producer._ import com.sksamuel.avro4s.RecordFormat import org.apache.kafka.clients.admin.NewTopic import org.apache.kafka.clients.producer.ProducerRecord import scala.concurrent.duration._ import org.apache.kafka.common.TopicPartition final class ExampleApp[F[_]: Async: ContextShift] { import ExampleApp._ // Change these for your environment as needed val topic = new NewTopic(s"example1.customers.v1", 1, 3.toShort) val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093" val schemaRegistryUri = "http://kafka.local:8081" val producerRecords: Vector[ProducerRecord[CustomerId, Customer]] = (1 to 10) .map( a => new ProducerRecord( topic.name, CustomerId(a.toString), Customer(s"name-${a}", s"address-${a}") ) ) .toVector val producerResource: Resource[F, ProducerApi[F, CustomerId, Customer]] = ProducerApi.Avro4s.resource[F, CustomerId, Customer]( BootstrapServers(kafkaBootstrapServers), SchemaRegistryUrl(schemaRegistryUri), ClientId("producer-example") ) val consumerResource = ConsumerApi.Avro4s.resource[F, CustomerId, Customer]( BootstrapServers(kafkaBootstrapServers), SchemaRegistryUrl(schemaRegistryUri), ClientId("consumer-example"), GroupId("consumer-example-group"), EnableAutoCommit(false) ) val example: F[Unit] = for { _ <- Sync[F].delay(println("Starting kafka4s example")) _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic) _ <- Sync[F].delay(println(s"Created topic ${topic.name}")) schemaRegistry <- SchemaRegistryApi(schemaRegistryUri) _ <- schemaRegistry.registerKey[CustomerId](topic.name) _ <- Sync[F].delay(println(s"Registered key schema for topic ${topic.name}")) _ <- schemaRegistry.registerValue[Customer](topic.name) _ <- Sync[F].delay(println(s"Registered value schema for topic ${topic.name}")) _ <- producerResource.use( producer => producerRecords.traverse_( pr => producer.sendSync(pr) *> Sync[F] .delay(println(s"Wrote producer record: key ${pr.key} and value ${pr.value}")) ) ) _ <- consumerResource.use( consumer => consumer.assign(topic.name, Map.empty[TopicPartition, Long]) *> consumer .recordStream(1.second) .take(producerRecords.size.toLong) .evalMap( cr => Sync[F] .delay(println(s"Read consumer record: key ${cr.key} and value ${cr.value}")) ) .compile .drain ) _ <- Sync[F].delay(println("Finished kafka4s example")) } yield () } object ExampleApp { case class CustomerId(id: String) case class Customer(name: String, address: String) implicit def customerIdRecordFormat = RecordFormat[CustomerId] implicit def customerRecordFormat = RecordFormat[Customer] def apply[F[_]: Async: ContextShift] = new ExampleApp[F] }
Example 23
Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.execution.Scheduler.Implicits.global import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.FunSuite import scala.collection.JavaConverters._ import scala.concurrent.Await import scala.concurrent.duration._ class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit { val topicsRegex = "monix-kafka-tests-.*".r val topicMatchingRegex = "monix-kafka-tests-anything" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val consumerCfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = "kafka-tests", clientId = "monix-kafka-1-0-consumer-test", autoOffsetReset = AutoOffsetReset.Earliest ) test("publish one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io) val consumer = Await.result(consumerTask.runToFuture, 60.seconds) try { // Publishing one message val send = producer.send(topicMatchingRegex, "my-message") Await.result(send.runToFuture, 30.seconds) val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList assert(records === List("my-message")) } finally { Await.result(producer.close().runToFuture, Duration.Inf) consumer.close() } } } test("listen for one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io) try { // Publishing one message val send = producer.send(topicMatchingRegex, "test-message") Await.result(send.runToFuture, 30.seconds) val first = consumer.take(1).map(_.value()).firstL val result = Await.result(first.runToFuture, 30.seconds) assert(result === "test-message") } finally { Await.result(producer.close().runToFuture, Duration.Inf) } } } test("full producer/consumer test when subscribed to topics regex") { withRunningKafka { val count = 10000 val producer = KafkaProducerSink[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = consumer .map(_.value()) .toListL val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) assert(result.map(_.toInt).sum === (0 until count).sum) } } }
Example 24
Source File: KafkaOffsetRevertTest.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.kafka import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp(true)) var i = 0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 25
Source File: StreamingProducerApp.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import cats.effect.{ExitCode, IO, IOApp} import com.pusher.client.Pusher import StreamingProducer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.collection.JavaConversions._ object StreamingProducerApp extends IOApp { val topic = "transactions" val pusher = new Pusher("de504dc5763aeef9ff52") val props = Map( "bootstrap.servers" -> "localhost:9092", "key.serializer" -> "org.apache.kafka.common.serialization.IntegerSerializer", "value.serializer" -> "org.apache.kafka.common.serialization.StringSerializer") def run(args: List[String]): IO[ExitCode] = { val kafkaProducer = new KafkaProducer[Int, String](props) subscribe(pusher) { wsTx => val tx = convertWsTransaction(deserializeWebsocketTransaction(wsTx)) val jsonTx = serializeTransaction(tx) kafkaProducer.send(new ProducerRecord(topic, tx.tid, jsonTx)) }.flatMap(_ => IO.never) } }
Example 26
Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.{FunSuite, Matchers} import scala.concurrent.duration._ import scala.concurrent.Await import monix.execution.Scheduler.Implicits.global import org.apache.kafka.clients.consumer.OffsetCommitCallback import org.apache.kafka.common.TopicPartition import org.scalacheck.Gen import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers { val commitCallbacks: List[Commit] = List.fill(4)(new Commit { override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] = Task.unit }) val committableOffsetsGen: Gen[CommittableOffset] = for { partition <- Gen.posNum[Int] offset <- Gen.posNum[Long] commit <- Gen.oneOf(commitCallbacks) } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit) test("merge by commit callback works") { forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets => val partitions = offsets.map(_.topicPartition) val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets) received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys } received.size should be <= 4 } } test("merge by commit callback for multiple consumers") { withRunningKafka { val count = 10000 val topicName = "monix-kafka-merge-by-commit" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val producer = KafkaProducerSink[String, String](producerCfg, io) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicName, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = Observable .range(0, 4) .mergeMap(i => createConsumer(i.toInt, topicName).take(500)) .bufferTumbling(2000) .map(CommittableOffsetBatch.mergeByCommitCallback) .map { offsetBatches => assert(offsetBatches.length == 4) } .completedL Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) } } private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = { val cfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = s"kafka-tests-$i", autoOffsetReset = AutoOffsetReset.Earliest ) KafkaConsumerObservable .manualCommit[String, String](cfg, List(topicName)) .executeOn(io) .map(_.committableOffset) } }
Example 27
Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.execution.Scheduler.Implicits.global import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.FunSuite import scala.collection.JavaConverters._ import scala.concurrent.Await import scala.concurrent.duration._ class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit { val topicsRegex = "monix-kafka-tests-.*".r val topicMatchingRegex = "monix-kafka-tests-anything" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val consumerCfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = "kafka-tests", clientId = "monix-kafka-1-0-consumer-test", autoOffsetReset = AutoOffsetReset.Earliest ) test("publish one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io) val consumer = Await.result(consumerTask.runToFuture, 60.seconds) try { // Publishing one message val send = producer.send(topicMatchingRegex, "my-message") Await.result(send.runToFuture, 30.seconds) val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList assert(records === List("my-message")) } finally { Await.result(producer.close().runToFuture, Duration.Inf) consumer.close() } } } test("listen for one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io) try { // Publishing one message val send = producer.send(topicMatchingRegex, "test-message") Await.result(send.runToFuture, 30.seconds) val first = consumer.take(1).map(_.value()).firstL val result = Await.result(first.runToFuture, 30.seconds) assert(result === "test-message") } finally { Await.result(producer.close().runToFuture, Duration.Inf) } } } test("full producer/consumer test when subscribed to topics regex") { withRunningKafka { val count = 10000 val producer = KafkaProducerSink[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = consumer .map(_.value()) .toListL val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) assert(result.map(_.toInt).sum === (0 until count).sum) } } }
Example 28
Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.{FunSuite, Matchers} import scala.concurrent.duration._ import scala.concurrent.Await import monix.execution.Scheduler.Implicits.global import org.apache.kafka.clients.consumer.OffsetCommitCallback import org.apache.kafka.common.TopicPartition import org.scalacheck.Gen import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers { val commitCallbacks: List[Commit] = List.fill(4)(new Commit { override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] = Task.unit }) val committableOffsetsGen: Gen[CommittableOffset] = for { partition <- Gen.posNum[Int] offset <- Gen.posNum[Long] commit <- Gen.oneOf(commitCallbacks) } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit) test("merge by commit callback works") { forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets => val partitions = offsets.map(_.topicPartition) val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets) received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys } received.size should be <= 4 } } test("merge by commit callback for multiple consumers") { withRunningKafka { val count = 10000 val topicName = "monix-kafka-merge-by-commit" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val producer = KafkaProducerSink[String, String](producerCfg, io) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicName, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = Observable .range(0, 4) .mergeMap(i => createConsumer(i.toInt, topicName).take(500)) .bufferTumbling(2000) .map(CommittableOffsetBatch.mergeByCommitCallback) .map { offsetBatches => assert(offsetBatches.length == 4) } .completedL Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) } } private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = { val cfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = s"kafka-tests-$i", autoOffsetReset = AutoOffsetReset.Earliest ) KafkaConsumerObservable .manualCommit[String, String](cfg, List(topicName)) .executeOn(io) .map(_.committableOffset) } }
Example 29
Source File: MonixKafkaTopicRegexTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.execution.Scheduler.Implicits.global import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.FunSuite import scala.collection.JavaConverters._ import scala.concurrent.Await import scala.concurrent.duration._ class MonixKafkaTopicRegexTest extends FunSuite with KafkaTestKit { val topicsRegex = "monix-kafka-tests-.*".r val topicMatchingRegex = "monix-kafka-tests-anything" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val consumerCfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = "kafka-tests", clientId = "monix-kafka-1-0-consumer-test", autoOffsetReset = AutoOffsetReset.Earliest ) test("publish one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumerTask = KafkaConsumerObservable.createConsumer[String, String](consumerCfg, topicsRegex).executeOn(io) val consumer = Await.result(consumerTask.runToFuture, 60.seconds) try { // Publishing one message val send = producer.send(topicMatchingRegex, "my-message") Await.result(send.runToFuture, 30.seconds) val records = consumer.poll(10.seconds.toMillis).asScala.map(_.value()).toList assert(records === List("my-message")) } finally { Await.result(producer.close().runToFuture, Duration.Inf) consumer.close() } } } test("listen for one message when subscribed to topics regex") { withRunningKafka { val producer = KafkaProducer[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io) try { // Publishing one message val send = producer.send(topicMatchingRegex, "test-message") Await.result(send.runToFuture, 30.seconds) val first = consumer.take(1).map(_.value()).firstL val result = Await.result(first.runToFuture, 30.seconds) assert(result === "test-message") } finally { Await.result(producer.close().runToFuture, Duration.Inf) } } } test("full producer/consumer test when subscribed to topics regex") { withRunningKafka { val count = 10000 val producer = KafkaProducerSink[String, String](producerCfg, io) val consumer = KafkaConsumerObservable[String, String](consumerCfg, topicsRegex).executeOn(io).take(count) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicMatchingRegex, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = consumer .map(_.value()) .toListL val (result, _) = Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) assert(result.map(_.toInt).sum === (0 until count).sum) } } }
Example 30
Source File: MergeByCommitCallbackTest.scala From monix-kafka with Apache License 2.0 | 5 votes |
package monix.kafka import monix.eval.Task import monix.kafka.config.AutoOffsetReset import monix.reactive.Observable import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.{FunSuite, Matchers} import scala.concurrent.duration._ import scala.concurrent.Await import monix.execution.Scheduler.Implicits.global import org.apache.kafka.clients.consumer.OffsetCommitCallback import org.apache.kafka.common.TopicPartition import org.scalacheck.Gen import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers { val commitCallbacks: List[Commit] = List.fill(4)(new Commit { override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] = Task.unit }) val committableOffsetsGen: Gen[CommittableOffset] = for { partition <- Gen.posNum[Int] offset <- Gen.posNum[Long] commit <- Gen.oneOf(commitCallbacks) } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit) test("merge by commit callback works") { forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets => val partitions = offsets.map(_.topicPartition) val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets) received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys } received.size should be <= 4 } } test("merge by commit callback for multiple consumers") { withRunningKafka { val count = 10000 val topicName = "monix-kafka-merge-by-commit" val producerCfg = KafkaProducerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), clientId = "monix-kafka-1-0-producer-test" ) val producer = KafkaProducerSink[String, String](producerCfg, io) val pushT = Observable .range(0, count) .map(msg => new ProducerRecord(topicName, "obs", msg.toString)) .bufferIntrospective(1024) .consumeWith(producer) val listT = Observable .range(0, 4) .mergeMap(i => createConsumer(i.toInt, topicName).take(500)) .bufferTumbling(2000) .map(CommittableOffsetBatch.mergeByCommitCallback) .map { offsetBatches => assert(offsetBatches.length == 4) } .completedL Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds) } } private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = { val cfg = KafkaConsumerConfig.default.copy( bootstrapServers = List("127.0.0.1:6001"), groupId = s"kafka-tests-$i", autoOffsetReset = AutoOffsetReset.Earliest ) KafkaConsumerObservable .manualCommit[String, String](cfg, List(topicName)) .executeOn(io) .map(_.committableOffset) } }
Example 31
Source File: SimpleProducer.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License | 5 votes |
package packt.ch05 import java.util.{Date, Properties} import packt.ch05.SimpleProducer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} object SimpleProducer { private var producer: KafkaProducer[String, String] = _ def main(args: Array[String]) { val argsCount = args.length if (argsCount == 0 || argsCount == 1) throw new IllegalArgumentException( "Provide topic name and Message count as arguments") // Topic name and the message count to be published is passed from the // command line val topic = args(0) val count = args(1) val messageCount = java.lang.Integer.parseInt(count) println("Topic Name - " + topic) println("Message Count - " + messageCount) val simpleProducer = new SimpleProducer() simpleProducer.publishMessage(topic, messageCount) } } class SimpleProducer { val props = new Properties() // Set the broker list for requesting metadata to find the lead broker props.put("metadata.broker.list", "192.168.146.132:9092, 192.168.146.132:9093, 192.168 146.132:9094 ") //This specifies the serializer class for keys props.put("serializer.class", "kafka.serializer.StringEncoder") // 1 means the producer receives an acknowledgment once the lead replica // has received the data. This option provides better durability as the // client waits until the server acknowledges the request as successful. props.put("request.required.acks", "1") producer = new KafkaProducer(props) private def publishMessage(topic: String, messageCount: Int) { for (mCount <- 0 until messageCount) { val runtime = new Date().toString val msg = "Message Publishing Time - " + runtime println(msg) // Create a message val data = new ProducerRecord[String, String](topic, msg) // Publish the message producer.send(data) } // Close producer connection with broker. producer.close() } }
Example 32
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 33
Source File: KafkaProducerActorSpec.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.akka import akka.actor.ActorSystem import akka.testkit.TestProbe import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.util.Random class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) { def this() = this(ActorSystem("KafkaProducerActorSpec")) private def randomString: String = Random.alphanumeric.take(5).mkString("") val deserializer = new StringDeserializer val consumerConf = KafkaConsumer.Conf( deserializer, deserializer, bootstrapServers = s"localhost:$kafkaPort", groupId = "test", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST ) val serializer = new StringSerializer val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort") "KafkaProducerActor" should "write a given batch to Kafka" in { val topic = randomString val probe = TestProbe() val producer = system.actorOf(KafkaProducerActor.props(producerConf)) val batch: Seq[ProducerRecord[String, String]] = Seq( KafkaProducerRecord(topic, "foo"), KafkaProducerRecord(topic, "key", "value"), KafkaProducerRecord(topic, "bar")) val message = ProducerRecords(batch, Some('response)) probe.send(producer, message) probe.expectMsg('response) val results = consumeFromTopic(topic, 3, 10000) results(0) shouldEqual ((None, "foo")) results(1) shouldEqual ((Some("key"), "value")) results(2) shouldEqual ((None, "bar")) } "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in { import scala.concurrent.duration._ val topic = randomString val probe = TestProbe() val producer = system.actorOf(KafkaProducerActor.props(producerConf)) val batch: Seq[ProducerRecord[String, String]] = Seq( KafkaProducerRecord(topic, "foo"), KafkaProducerRecord(topic, "key", "value"), KafkaProducerRecord(topic, "bar") ) val message = ProducerRecords(batch) probe.send(producer, message) probe.expectNoMessage(3.seconds) val results = consumeFromTopic(topic, 3, 10000) results(0) shouldEqual ((None, "foo")) results(1) shouldEqual ((Some("key"), "value")) results(2) shouldEqual ((None, "bar")) } private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) = kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer) }
Example 34
Source File: Kafka.scala From event-sourcing-kafka-streams with MIT License | 5 votes |
package org.amitayh.invoices.web import java.time.Duration import java.util.Collections.singletonList import java.util.Properties import cats.effect._ import cats.syntax.apply._ import cats.syntax.functor._ import fs2._ import org.amitayh.invoices.common.Config import org.amitayh.invoices.common.Config.Topics.Topic import org.apache.kafka.clients.consumer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.log4s.{Logger, getLogger} import scala.collection.JavaConverters._ object Kafka { trait Producer[F[_], K, V] { def send(key: K, value: V): F[RecordMetadata] } object Producer { def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] = (key: K, value: V) => Async[F].async { cb => val record = new ProducerRecord(topic.name, key, value) producer.send(record, (metadata: RecordMetadata, exception: Exception) => { if (exception != null) cb(Left(exception)) else cb(Right(metadata)) }) } } def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource { val create = Sync[F].delay { val props = new Properties props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer) } create.map(producer => (Producer(producer, topic), close(producer))) } def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = { val create = Sync[F].delay { val props = new Properties props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId) val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer) consumer.subscribe(singletonList(topic.name)) consumer } Stream.bracket(create)(close[F]).flatMap(consume[F, K, V]) } private val logger: Logger = getLogger def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg)) private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for { records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1)))) record <- Stream.emits(records.iterator.asScala.toSeq) } yield record.key -> record.value private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] = Sync[F].delay(producer.close()) *> log(s"Producer closed") private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] = Sync[F].delay(consumer.close()) *> log("Consumer closed") }
Example 35
Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.client import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer class MessageSender(val brokers: String) { import MessageSender._ val producer = new KafkaProducer[Array[Byte], Array[Byte]]( providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName)) def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get producer.flush() } def writeValue(topic: String, value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get producer.flush() } def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 36
Source File: SlotPartitionMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.demo import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val prop = Common.getProp prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 37
Source File: IntervalJoinKafkaKeyMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.intervalJoin import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object IntervalJoinKafkaKeyMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("topic_left") right("topic_right") Thread.sleep(500) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } var idRight = 0 def right(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idRight = idRight + 1 val map = Map("id" -> idRight, "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } }
Example 38
Source File: FileSinkMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.text.SimpleDateFormat import java.util.Calendar import com.venn.common.Common import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object FileSinkMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("roll_file_sink") Thread.sleep(100) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) // producer.send(msg) // producer.flush() } var minute : Int = 1 val calendar: Calendar = Calendar.getInstance() def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } }
Example 39
Source File: WindowDemoMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 40
Source File: LateDataMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.sideoutput.lateDataProcess import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.SECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =74540; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(200) i = i + 1 // System.exit(-1) } } }
Example 41
Source File: CurrentDayMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.dayWindow import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 42
Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import java.util.Properties import com.timeyang.common.config.BaseConf import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer object KafkaProducerUtils { @volatile lazy private val producer: KafkaProducer[String, String] = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList) props.put("acks", "all") props.put("retries", 1: Integer) props.put("batch.size", 16384: Integer) props.put("linger.ms", 1: Integer) props.put("buffer.memory", 33554432: Integer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) new KafkaProducer[String, String](props) } def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = { for (event <- event +: events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, events: List[Object]): Unit = { for (event <- events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, event: Object): Unit = { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } }
Example 43
Source File: KafkaReporter.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.common.streaming.metrics import java.util.Properties import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} import org.apache.kafka.common.serialization.StringSerializer class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter { private val producer = ProducerSingleton.getInstance(bootstrapServers) override def report(startTime: Long, endTime: Long): Unit = { producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime")) } } object ProducerSingleton { @volatile private var instance : Option[KafkaProducer[String, String]] = None def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized { if (!instance.isDefined) { synchronized { if(!instance.isDefined) { val props = new Properties() props.put("bootstrap.servers", bootstrapServers) instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer)) } } } instance.get } }
Example 44
Source File: BasicTest.scala From ksql-streams with Apache License 2.0 | 5 votes |
package com.landoop.kstreams.sql.cluster import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.kafka.clients.producer.ProducerRecord class BasicTest extends ClusterTestingCapabilities { private def createAvroRecord = { val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}" val parser = new Schema.Parser val schema = parser.parse(userSchema) val avroRecord = new GenericData.Record(schema) avroRecord.put("name", "testUser") avroRecord } "KCluster" should { "start up and be able to handle avro records being sent " in { val topic = "testAvro" val avroRecord = createAvroRecord val objects = Array[AnyRef](avroRecord) val producerProps = stringAvroProducerProps val producer = createProducer(producerProps) for (o <- objects) { val message = new ProducerRecord[String, Any](topic, o) producer.send(message) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.toSeq shouldBe records } "handle the avro new producer" in { val topic = "testAvro" val avroRecord = createAvroRecord val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes) val producerProps = stringAvroProducerProps val producer = createProducer(producerProps) for (o <- objects) { producer.send(new ProducerRecord[String, Any](topic, o)) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.deep shouldBe records.toArray.deep } } }
Example 45
Source File: KafkaMetrics.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.transport import akka.actor.ActorSystem import com.typesafe.config.Config import hydra.common.config.ConfigSupport import hydra.kafka.producer.KafkaRecordMetadata import hydra.kafka.util.KafkaUtils import org.apache.kafka.clients.producer.ProducerRecord import spray.json.DefaultJsonProtocol trait KafkaMetrics { def saveMetrics(record: KafkaRecordMetadata): Unit def close(): Unit = {} } // $COVERAGE-OFF$ object NoOpMetrics extends KafkaMetrics { def saveMetrics(record: KafkaRecordMetadata): Unit = {} } // $COVERAGE-ON$ class PublishMetrics(topic: String)(implicit system: ActorSystem) extends KafkaMetrics with DefaultJsonProtocol with ConfigSupport { import spray.json._ import KafkaRecordMetadata._ private val producer = KafkaUtils .producerSettings[String, String]("string", rootConfig) .withProperty("client.id", "hydra.kafka.metrics") .createKafkaProducer() def saveMetrics(record: KafkaRecordMetadata) = { val payload = record.toJson.compactPrint producer.send(new ProducerRecord(topic, record.destination, payload)) } override def close(): Unit = { producer.close() } } object KafkaMetrics { import ConfigSupport._ def apply(config: Config)(implicit system: ActorSystem): KafkaMetrics = { val metricsEnabled = config.getBooleanOpt("transports.kafka.metrics.enabled").getOrElse(false) val metricsTopic = config .getStringOpt("transports.kafka.metrics.topic") .getOrElse("HydraKafkaError") if (metricsEnabled) new PublishMetrics(metricsTopic) else NoOpMetrics } }
Example 46
Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object DataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val someWords = List("about", "above", "after", "again", "against") info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 200 milliseconds) { Random.shuffle(someWords).headOption.foreach { word => producer.send(new ProducerRecord[String, String](topic, word)) } } }
Example 47
Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config._ import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStreamDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming companies listed into Kafka...") system.scheduler.schedule(0 seconds, 20 seconds) { randomCompanyNames.foreach { name => producer.send(new ProducerRecord[String, String](companiesTopic, name)) } } info("Streaming stocks data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](stocksTopic, write(stock))) } } }
Example 48
Source File: ProducerSpec.scala From zio-kafka with Apache License 2.0 | 5 votes |
package zio.kafka.producer import org.apache.kafka.clients.producer.ProducerRecord import zio._ import zio.clock.Clock import zio.kafka.KafkaTestUtils._ import zio.kafka.consumer.{ Consumer, ConsumerSettings, Subscription } import zio.kafka.embedded.Kafka import zio.kafka.serde.Serde import zio.test.Assertion._ import zio.test._ import zio.test.environment.TestEnvironment object ProducerSpec extends DefaultRunnableSpec { override def spec = suite("producer test suite")( testM("one record") { for { _ <- Producer.produce[Any, String, String](new ProducerRecord("topic", "boo", "baa")) } yield assertCompletes }, testM("a non-empty chunk of records") { import Subscription._ val (topic1, key1, value1) = ("topic1", "boo", "baa") val (topic2, key2, value2) = ("topic2", "baa", "boo") val chunks = Chunk.fromIterable( List(new ProducerRecord(topic1, key1, value1), new ProducerRecord(topic2, key2, value2)) ) def withConsumer(subscription: Subscription, settings: ConsumerSettings) = Consumer.make(settings).flatMap { c => (c.subscribe(subscription).toManaged_ *> c.plainStream(Serde.string, Serde.string).toQueue()) } for { outcome <- Producer.produceChunk[Any, String, String](chunks) settings <- consumerSettings("testGroup", "testClient") record1 <- withConsumer(Topics(Set(topic1)), settings).use { consumer => for { messages <- consumer.take.flatMap(_.done).mapError(_.getOrElse(new NoSuchElementException)) record = messages .filter(rec => rec.record.key == key1 && rec.record.value == value1) .toSeq } yield record } record2 <- withConsumer(Topics(Set(topic2)), settings).use { consumer => for { messages <- consumer.take.flatMap(_.done).mapError(_.getOrElse(new NoSuchElementException)) record = messages.filter(rec => rec.record.key == key2 && rec.record.value == value2) } yield record } } yield { assert(outcome.length)(equalTo(2)) && assert(record1)(isNonEmpty) && assert(record2.length)(isGreaterThan(0)) } }, testM("an empty chunk of records") { val chunks = Chunk.fromIterable(List.empty) for { outcome <- Producer.produceChunk[Any, String, String](chunks) } yield assert(outcome.length)(equalTo(0)) } ).provideSomeLayerShared[TestEnvironment]( ((Kafka.embedded >>> stringProducer) ++ Kafka.embedded).mapError(TestFailure.fail) ++ Clock.live ) }
Example 49
Source File: KafkaEventProducer.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.cosmosdb.cache import akka.Done import akka.actor.ActorSystem import akka.kafka.scaladsl.Producer import akka.kafka.{ProducerMessage, ProducerSettings} import akka.stream.scaladsl.{Keep, Sink, Source} import akka.stream.{ActorMaterializer, OverflowStrategy, QueueOfferResult} import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerRecord import org.apache.openwhisk.connector.kafka.KamonMetricsReporter import scala.collection.immutable.Seq import scala.concurrent.{ExecutionContext, Future, Promise} case class KafkaEventProducer( settings: ProducerSettings[String, String], topic: String, eventProducerConfig: EventProducerConfig)(implicit system: ActorSystem, materializer: ActorMaterializer) extends EventProducer { private implicit val executionContext: ExecutionContext = system.dispatcher private val queue = Source .queue[(Seq[String], Promise[Done])](eventProducerConfig.bufferSize, OverflowStrategy.dropNew) //TODO Use backpressure .map { case (msgs, p) => ProducerMessage.multi(msgs.map(newRecord), p) } .via(Producer.flexiFlow(producerSettings)) .map { case ProducerMessage.MultiResult(_, passThrough) => passThrough.success(Done) case _ => //As we use multi mode only other modes need not be handled } .toMat(Sink.ignore)(Keep.left) .run override def send(msg: Seq[String]): Future[Done] = { val promise = Promise[Done] queue.offer(msg -> promise).flatMap { case QueueOfferResult.Enqueued => promise.future case QueueOfferResult.Dropped => Future.failed(new Exception("Kafka request queue is full.")) case QueueOfferResult.QueueClosed => Future.failed(new Exception("Kafka request queue was closed.")) case QueueOfferResult.Failure(f) => Future.failed(f) } } def close(): Future[Done] = { queue.complete() queue.watchCompletion() } private def newRecord(msg: String) = new ProducerRecord[String, String](topic, "messages", msg) private def producerSettings = settings.withProperty(ConsumerConfig.METRIC_REPORTER_CLASSES_CONFIG, KamonMetricsReporter.name) }
Example 50
Source File: KafkaAsReceiver.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.util.Properties import org.apache.kafka.clients.producer.Callback import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.producer.RecordMetadata import org.apache.spark.internal.Logging class KafkaAsReceiver(bootstrapServers: String) extends AbstractActionsHandler with SendStreamActionSupport with Logging { val props = new Properties(); props.put("bootstrap.servers", bootstrapServers); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); val producer = new KafkaProducer[String, String](props); override def listActionHandlerEntries(requestBody: Map[String, Any]): PartialFunction[String, Map[String, Any]] = { case "actionSendStream" ⇒ handleSendStream(requestBody); } override def destroy() { producer.close(); } override def onReceiveStream(topic: String, rows: Array[RowEx]) = { var index = -1; for (row ← rows) { index += 1; val key = "" + row.batchId + "-" + row.offsetInBatch; //TODO: send an array instead of a string value? val value = row.originalRow(0).toString(); val record = new ProducerRecord[String, String](topic, key, value); producer.send(record, new Callback() { def onCompletion(metadata: RecordMetadata, e: Exception) = { if (e != null) { e.printStackTrace(); logError(e.getMessage); } else { val offset = metadata.offset(); val partition = metadata.partition(); logDebug(s"record is sent to kafka:key=$key, value=$value, partition=$partition, offset=$offset"); } } }); } } } class KafkaAsReceiverFactory extends ActionsHandlerFactory { def createInstance(params: Params) = new KafkaAsReceiver(params.getRequiredString("bootstrapServers")); }
Example 51
Source File: ActionsHandler.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import java.sql.Timestamp import org.apache.spark.sql.types.StructType import java.util.concurrent.atomic.AtomicInteger def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries; def destroy(); } trait ActionsHandlerFactory { def createInstance(params: Params): ActionsHandler; } abstract class AbstractActionsHandler extends ActionsHandler { def getRequiredParam(requestBody: Map[String, Any], key: String): Any = { val opt = requestBody.get(key); if (opt.isEmpty) { throw new MissingRequiredRequestParameterException(key); } opt.get; } override def destroy() = { } } class NullActionsHandler extends AbstractActionsHandler { override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() { def apply(action: String) = Map[String, Any](); //yes, do nothing def isDefinedAt(action: String) = false; }; } //rich row with extra info: id, time stamp, ... case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) { def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp); def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch"); def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) }; } trait SendStreamActionSupport { def onReceiveStream(topic: String, rows: Array[RowEx]); def getRequiredParam(requestBody: Map[String, Any], key: String): Any; val listeners = ArrayBuffer[StreamListener](); def addListener(listener: StreamListener): this.type = { listeners += listener; this; } protected def notifyListeners(topic: String, data: Array[RowEx]) { listeners.foreach { _.onArrive(topic, data); } } def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = { val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String]; val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long]; val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]]; val ts = new Timestamp(System.currentTimeMillis()); var index = -1; val rows2 = rows.map { row ⇒ index += 1; RowEx(Row.fromSeq(row.toSeq), batchId, index, ts) } onReceiveStream(topic, rows2); notifyListeners(topic, rows2); Map("rowsCount" -> rows.size); } }
Example 52
Source File: ProducerStreamSpec.scala From reactive-kafka-microservice-template with Apache License 2.0 | 5 votes |
package akka.kafka import akka.actor.ActorSystem import akka.stream.scaladsl.{Sink, Source} import akka.testkit.{DefaultTimeout, ImplicitSender, TestKit, TestProbe} import com.omearac.consumers.ConsumerStream import com.omearac.producers.ProducerStream import com.omearac.settings.Settings import com.omearac.shared.JsonMessageConversion.Conversion import com.omearac.shared.KafkaMessages.{ExampleAppEvent, KafkaMessage} import org.apache.kafka.clients.producer.ProducerRecord import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} class ProducerStreamSpec extends TestKit(ActorSystem("ProducerStreamSpec")) with DefaultTimeout with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with ConsumerStream with ProducerStream { val settings = Settings(system).KafkaProducers val probe = TestProbe() override def afterAll: Unit = { shutdown() } "Sending KafkaMessages to the KafkaMessage producerStream" should { "be converted to JSON and obtained by the Stream Sink " in { //Creating Producer Stream Components for publishing KafkaMessages val producerProps = settings.KafkaProducerInfo("KafkaMessage") val numOfMessages = 50 val kafkaMsgs = for { i <- 0 to numOfMessages} yield KafkaMessage("sometime", "somestuff", i) val producerSource= Source(kafkaMsgs) val producerFlow = createStreamFlow[KafkaMessage](producerProps) val producerSink = Sink.actorRef(probe.ref, "complete") val jsonKafkaMsgs = for { msg <- kafkaMsgs} yield Conversion[KafkaMessage].convertToJson(msg) producerSource.via(producerFlow).runWith(producerSink) for (i <- 0 to jsonKafkaMsgs.length) { probe.expectMsgPF(){ case m: ProducerRecord[_,_] => if (jsonKafkaMsgs.contains(m.value())) () else fail() case "complete" => () } } } } "Sending ExampleAppEvent messages to the EventMessage producerStream" should { "be converted to JSON and obtained by the Stream Sink " in { //Creating Producer Stream Components for publishing ExampleAppEvent messages val producerProps = settings.KafkaProducerInfo("ExampleAppEvent") val numOfMessages = 50 val eventMsgs = for { i <- 0 to 50} yield ExampleAppEvent("sometime", "senderID", s"Event number $i occured") val producerSource= Source(eventMsgs) val producerFlow = createStreamFlow[ExampleAppEvent](producerProps) val producerSink = Sink.actorRef(probe.ref, "complete") val jsonAppEventMsgs = for{ msg <- eventMsgs} yield Conversion[ExampleAppEvent].convertToJson(msg) producerSource.via(producerFlow).runWith(producerSink) for (i <- 0 to jsonAppEventMsgs.length){ probe.expectMsgPF(){ case m: ProducerRecord[_,_] => if (jsonAppEventMsgs.contains(m.value())) () else fail() case "complete" => () } } } } }
Example 53
Source File: ProducerStream.scala From reactive-kafka-microservice-template with Apache License 2.0 | 5 votes |
package com.omearac.producers import akka.actor.{ActorRef, ActorSystem} import akka.kafka.ProducerSettings import akka.kafka.scaladsl.Producer import akka.stream.OverflowStrategy import akka.stream.scaladsl.{Flow, Source} import com.omearac.shared.JsonMessageConversion.Conversion import com.omearac.shared.{AkkaStreams, EventSourcing} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer} trait ProducerStream extends AkkaStreams with EventSourcing { implicit val system: ActorSystem def self: ActorRef def createStreamSource[msgType] = { Source.queue[msgType](Int.MaxValue,OverflowStrategy.backpressure) } def createStreamSink(producerProperties: Map[String, String]) = { val kafkaMBAddress = producerProperties("bootstrap-servers") val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer).withBootstrapServers(kafkaMBAddress) Producer.plainSink(producerSettings) } def createStreamFlow[msgType: Conversion](producerProperties: Map[String, String]) = { val numberOfPartitions = producerProperties("num.partitions").toInt -1 val topicToPublish = producerProperties("publish-topic") val rand = new scala.util.Random val range = 0 to numberOfPartitions Flow[msgType].map { msg => val partition = range(rand.nextInt(range.length)) val stringJSONMessage = Conversion[msgType].convertToJson(msg) new ProducerRecord[Array[Byte], String](topicToPublish, partition, null, stringJSONMessage) } } }
Example 54
Source File: KafkaTransmitter.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.transmitters import java.util.Properties import akka.actor.{ActorLogging, Props} import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import scala.sys.SystemProperties import com.typesafe.config.Config object KafkaTransmitter { def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic)) } class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging { private val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers")) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer")) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer")) // Enable settings for a secure environment, if necessary. // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html val systemProperties = new SystemProperties if (config.getBoolean("transmitter.kafka.security-enabled")) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol")) systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file")) } private val producer = new KafkaProducer[String, String](props) def receive = { case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV)) } override def postStop(): Unit = { producer.close() log.info("KafkaTransmitter closed its producer.") } }
Example 55
Source File: EventProducer.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.rokku.proxy.provider.kafka import akka.Done import akka.http.scaladsl.model.HttpMethod import com.ing.wbaa.rokku.proxy.config.KafkaSettings import com.ing.wbaa.rokku.proxy.data.RequestId import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata } import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.{ ExecutionContext, Future } trait EventProducer { private val logger = new LoggerHandlerWithId import scala.collection.JavaConverters._ protected[this] implicit val kafkaSettings: KafkaSettings protected[this] implicit val executionContext: ExecutionContext private lazy val config: Map[String, Object] = Map[String, Object]( "bootstrap.servers" -> kafkaSettings.bootstrapServers, ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries, ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff, ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax, CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol, ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock, ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs, "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation, "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword, "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation, "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword, "ssl.key.password" -> kafkaSettings.sslKeyPassword ) private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer) def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = { kafkaProducer .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => { exception match { case e: Exception => MetricsFactory.incrementKafkaSendErrors logger.error("error in sending event {} to topic {}, error={}", event, topic, e) throw new Exception(e) case _ => httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) } logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset()) } }) match { case _ => Future(Done) } } }
Example 56
Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStaticDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](topic, write(stock))) } } }
Example 57
Source File: KafkaRecord.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.producer import hydra.core.transport.HydraRecord import org.apache.commons.lang3.ClassUtils import org.apache.kafka.clients.producer.ProducerRecord val formatName: String = { val cname = ClassUtils.getSimpleName(getClass) val idx = cname.indexOf("Record") if (idx != -1) { cname.take(idx).toLowerCase } else { getClass.getName } } } object KafkaRecord { implicit def toProducerRecord[K, V](record: KafkaRecord[K, V]): ProducerRecord[K, V] = { new ProducerRecord[K, V]( record.destination, record.partition.getOrElse(null).asInstanceOf[Integer], record.timestamp, record.key, record.payload ) } }
Example 58
Source File: KafkaMessagingSystem.scala From amadou with Apache License 2.0 | 5 votes |
package com.mediative.amadou package monitoring import java.util.Properties import com.typesafe.config.Config import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} class KafkaMessagingSystem(config: Config) extends MessagingSystem with Logging { private val properties = KafkaMessagingSystem.readProperties(config) private val producer = new KafkaProducer[String, String](properties) private val topicPrefix = properties.getProperty("topic.prefix") override def publish(topic: String, message: String): Unit = { val topicName = s"$topicPrefix-$topic" logger.info(s"Publishing to $topicName :\n$message\n") producer.send(new ProducerRecord[String, String](topicName, message), new Callback { override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = if (exception != null) { logger .error(s"Cannot publish to $topicName. Caused by: ${exception.getMessage}", exception) } }) () } override def stop(): Unit = producer.close() } object KafkaMessagingSystem { def readProperties(config: Config): Properties = { val propertiesKeys = Seq( "bootstrap.servers", "acks", "retries", "batch.size", "linger.ms", "buffer.memory", "key.serializer", "value.serializer", "topic.prefix") val properties = new Properties() propertiesKeys.foreach(key => properties.setProperty(key, config.getString(key))) properties } }
Example 59
Source File: DStreamKafkaWriter.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import org.apache.kafka.clients.producer.{Callback, ProducerRecord} import org.apache.spark.streaming.dstream.DStream import scala.reflect.ClassTag override def writeToKafka[K, V]( producerConfig: Map[String, Object], transformFunc: T => ProducerRecord[K, V], callback: Option[Callback] = None ): Unit = dStream.foreachRDD { rdd => val rddWriter = new RDDKafkaWriter[T](rdd) rddWriter.writeToKafka(producerConfig, transformFunc, callback) } }
Example 60
Source File: RDDKafkaWriter.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import org.apache.kafka.clients.producer.{Callback, ProducerRecord} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag override def writeToKafka[K, V]( producerConfig: Map[String, Object], transformFunc: T => ProducerRecord[K, V], callback: Option[Callback] = None ): Unit = rdd.foreachPartition { partition => val producer = KafkaProducerCache.getProducer[K, V](producerConfig) partition .map(transformFunc) .foreach(record => producer.send(record, callback.orNull)) } }
Example 61
Source File: KafkaSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.kafka import com.sksamuel.exts.Logging import io.eels.schema.StructType import io.eels.{Row, SinkWriter, Sink} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} trait KafkaRowConverter[V] { def convert(row: Row): V } object KafkaRowConverter { implicit object NoopRowConverter extends KafkaRowConverter[Row] { override def convert(row: Row): Row = row } } case class KafkaSink[K, V](topic: String, producer: KafkaProducer[K, V]) (implicit partitioner: KafkaPartitioner[V], converter: KafkaRowConverter[V], keygen: KafkaKeyGen[K]) extends Sink with Logging { def open(schema: StructType): SinkWriter = { new SinkWriter { override def write(row: Row): Unit = { val key = keygen.gen(row) val value = converter.convert(row) val record = partitioner.partition(row) match { case Some(part) => new ProducerRecord[K, V](topic, part, key, value) case _ => new ProducerRecord[K, V](topic, key, value) } logger.debug(s"Sending record $record") producer.send(record) producer.flush() } override def close(): Unit = producer.close() } } }
Example 62
Source File: WordCountProducer.scala From akka_streams_tutorial with MIT License | 5 votes |
package alpakka.kafka import java.util import java.util.concurrent.ThreadLocalRandom import akka.actor.ActorSystem import akka.kafka.ProducerMessage.Message import akka.kafka.ProducerSettings import akka.kafka.scaladsl.Producer import akka.stream.ThrottleMode import akka.stream.scaladsl.{Keep, Sink, Source} import akka.{Done, NotUsed} import org.apache.kafka.clients.producer.{Partitioner, ProducerRecord} import org.apache.kafka.common.errors.{NetworkException, UnknownTopicOrPartitionException} import org.apache.kafka.common.serialization.StringSerializer import org.apache.kafka.common.{Cluster, PartitionInfo} import scala.concurrent.Future import scala.concurrent.duration._ class CustomPartitioner extends Partitioner { override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = { val partitionInfoList: util.List[PartitionInfo] = cluster.availablePartitionsForTopic(topic) val partitionCount = partitionInfoList.size val fakeNewsPartition = 0 //println("CustomPartitioner received key: " + key + " and value: " + value) if (value.toString.contains(WordCountProducer.fakeNewsKeyword)) { //println("CustomPartitioner send message: " + value + " to fakeNewsPartition") fakeNewsPartition } else ThreadLocalRandom.current.nextInt(1, partitionCount) //round robin } override def close(): Unit = { println("CustomPartitioner: " + Thread.currentThread + " received close") } override def configure(configs: util.Map[String, _]): Unit = { println("CustomPartitioner received configure with configuration: " + configs) } } object CustomPartitioner { private def deserialize[V](objectData: Array[Byte]): V = org.apache.commons.lang3.SerializationUtils.deserialize(objectData).asInstanceOf[V] }
Example 63
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 64
Source File: KafkaWordCount.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.basic import java.util import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount"). set("spark.streaming.receiver.writeAheadLog.enable", "true"). set("spark.streaming.kafka.maxRatePerPartition", "1000") val ssc = new StreamingContext(sparkConf, Seconds(2)) // 设置 checkpoint,这是考虑到了有 window 操作,window 操作一般是需要进行 checkpoint ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap // createStream 返回的是一个 Tuple2,具有 key,value,这里只关注 value. // 注意这里是 Receiver-based 方式(还提供了 non-receiver 模式),默认配置下,这种方式是会在 receiver 挂掉 // 丢失数据的,需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整. val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2) val words = lines.flatMap(_.split(" ")) // 统计的是 10 分钟内的单词数量,每隔 10 秒统计 1 次 val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2). filter(x => x._2 > 0) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } // 需要注意的是这里是 broker list,为 host:port,host:port 形式 val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new util.HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while (true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 65
Source File: WriteToKafka.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.kafka import java.util import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf._ import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import java.util.Properties import org.apache.spark.sql.SparkSession import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.Producer import org.apache.kafka.clients.producer.ProducerRecord import scala.collection.mutable class WriteToKafka extends ConfigurableStop{ val description: String = "Write data to kafka" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var kafka_host:String =_ var topic:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() val df = in.read() val properties:Properties = new Properties() properties.put("bootstrap.servers", kafka_host) properties.put("acks", "all") //properties.put("retries", 0) //properties.put("batch.size", 16384) //properties.put("linger.ms", 1) //properties.put("buffer.memory", 33554432) properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") var producer:Producer[String,String] = new KafkaProducer[String,String](properties) df.collect().foreach(row=>{ //var hm:util.HashMap[String,String]=new util.HashMap() //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String]))) var res:List[String]=List() row.schema.fields.foreach(f=>{ if(row.getAs(f.name)==null)res="None"::res else{ res=row.getAs(f.name).asInstanceOf[String]::res } }) val s:String=res.reverse.mkString(",") val record=new ProducerRecord[String,String](topic,s) producer.send(record) }) producer.close() } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String] //port=Integer.parseInt(MapUtil.get(map,key="port").toString) topic=MapUtil.get(map,key="topic").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true) val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true) descriptor = kafka_host :: descriptor descriptor = topic :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/kafka/WriteToKafka.png") } override def getGroup(): List[String] = { List(StopGroup.KafkaGroup.toString) } override val authorEmail: String = "[email protected]" }
Example 66
Source File: KafkaWordCount.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 67
Source File: KafkaPublisher.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} class KafkaPublisher { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("partition.assignment.strategy", "range") props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") val producer = new KafkaProducer[Array[Byte], Array[Byte]](props) def send(topic: String, event: Array[Byte]): Unit = { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } def send(topic: String, events: List[Array[Byte]]): Unit = { for (event <- events) { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } } }
Example 68
Source File: SessionKafkaProducer.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.{Properties, Random} import net.liftweb.json.DefaultFormats import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import net.liftweb.json.Serialization.write object SessionKafkaProducer { def main(args:Array[String]): Unit = { implicit val formats = DefaultFormats val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) val numberOfEntities = args(3).toInt val numberOfMessagesPerEntity = args(4).toInt val waitTimeBetweenMessageBatch = args(5).toInt val chancesOfMissing = args(6).toInt val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put("acks", "all") props.put("retries", "0") props.put("batch.size", "16384") props.put("linger.ms", "1") props.put("buffer.memory", "33554432") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) val r = new Random() var sentCount = 0 println("About to send to " + topic) for (j <- 0 to numberOfMessagesPerEntity) { for (i <- 0 to numberOfEntities) { if (r.nextInt(chancesOfMissing) != 0) { val message = write(HeartBeat(i.toString, System.currentTimeMillis())) val producerRecord = new ProducerRecord[String,String](topic, message) producer.send(producerRecord) sentCount += 1 } } println("Sent Count:" + sentCount) Thread.sleep(waitTimeBetweenMessageBatch) } producer.close() } }
Example 69
Source File: KafkaWordCount.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka.KafkaUtils StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. // object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 70
Source File: FailingKafkaStorage.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.util.concurrent.Future import io.amient.affinity.core.storage.{LogStorageConf, Record} import io.amient.affinity.core.util.MappedJavaFuture import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata} class FailingKafkaStorage(conf: LogStorageConf) extends KafkaLogStorage(conf) { override def append(record: Record[Array[Byte], Array[Byte]]): Future[java.lang.Long] = { val producerRecord = new ProducerRecord(topic, null, record.timestamp, record.key, record.value) new MappedJavaFuture[RecordMetadata, java.lang.Long](producer.send(producerRecord)) { override def map(result: RecordMetadata): java.lang.Long = { if (System.currentTimeMillis() % 3 == 0) throw new RuntimeException("simulated kafka producer error") result.offset() } } } }
Example 71
Source File: TransactionalProducer.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.util.Properties import akka.actor.Actor import akka.actor.Status.{Failure, Success} import akka.event.Logging import com.typesafe.config.Config import io.amient.affinity.Conf import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord} import io.amient.affinity.core.config.CfgStruct import io.amient.affinity.core.storage.StorageConf import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf} import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer import scala.collection.JavaConverters._ object KafkaConf extends KafkaConf { override def apply(config: Config): KafkaConf = new KafkaConf().apply(config) } class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) { val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer") val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports") val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports") } class TransactionalProducer extends Actor { val logger = Logging.getLogger(context.system, this) private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage) val producerConfig = new Properties() { if (kafkaConf.Producer.isDefined) { val producerConfig = kafkaConf.Producer.toMap() if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer") if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer") if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer") producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) => put(entry.getKey, entry.getValue.apply.toString) } } put("bootstrap.servers", kafkaConf.BootstrapServers()) put("value.serializer", classOf[ByteArraySerializer].getName) put("key.serializer", classOf[ByteArraySerializer].getName) } override def receive: Receive = { case req@TransactionBegin(transactionalId) => req(sender) ! { if (producer == null) { producerConfig.put("transactional.id", transactionalId) producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig) logger.debug(s"Transactions.Init(transactional.id = $transactionalId)") producer.initTransactions() } logger.debug("Transactions.Begin()") producer.beginTransaction() } case TransactionalRecord(topic, key, value, timestamp, partition) => val replyto = sender val producerRecord = new ProducerRecord( topic, partition.map(new Integer(_)).getOrElse(null), timestamp.map(new java.lang.Long(_)).getOrElse(null), key, value) logger.debug(s"Transactions.Append(topic=$topic)") producer.send(producerRecord, new Callback { override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = { if (exception != null) { replyto ! Failure(exception) } else { replyto ! Success(metadata.offset()) } } }) case req@TransactionCommit() => req(sender) ! { logger.debug("Transactions.Commit()") producer.commitTransaction() } case req@TransactionAbort() => req(sender) ! { logger.debug("Transactions.Abort()") producer.abortTransaction() } } }
Example 72
Source File: ExampleExternalStateSpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.example import java.util.Properties import com.typesafe.config.ConfigFactory import io.amient.affinity.core.cluster.Node import io.amient.affinity.core.util.AffinityTestBase import io.amient.affinity.kafka.EmbeddedKafka import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.scalatest.concurrent.TimeLimitedTests import org.scalatest.time.{Millis, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.collection.JavaConverters._ class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll with TimeLimitedTests { override def numPartitions = 2 val config = configure(ConfigFactory.load("example-external-state")) val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic") val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap))) override def beforeAll: Unit = try { createTopic(topic) val externalProducer = createKafkaAvroProducer[String, String]() try { externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding")) externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding")) externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again")) externalProducer.flush() } finally { externalProducer.close() } //the external fixture is produced and the externalProducer is flushed() before the node is started node.start() node.awaitClusterReady() //at this point all stores have loaded everything available in the external topic so the test will be deterministic } finally { super.beforeAll() } override def afterAll: Unit = try { node.shutdown() } finally { super.afterAll() } behavior of "External State" val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time it should "start automatically tailing state partitions on startup even when master" in { //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic val response = node.get_text(node.http_get(s"/news/latest")) response should include("10:30\tthe universe is expanding") response should include("11:00\tthe universe is still expanding") response should include("11:30\tthe universe briefly contracted but is expanding again") } private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties { put("bootstrap.servers", kafkaBootstrap) put("acks", "1") put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") //this simply adds all configs required by KafkaAvroSerializer config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) => put(entry.getKey, entry.getValue.unwrapped()) } }) }
Example 73
Source File: KafkaWordCount.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 74
Source File: KafkaWordCount.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 75
Source File: WordCountTestableSpec.scala From kafka-streams with Apache License 2.0 | 5 votes |
package com.supergloo.examples import com.supergloo.WordCountTestable import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.apache.kafka.streams.TopologyTestDriver import org.apache.kafka.streams.state.KeyValueStore import org.apache.kafka.streams.test.ConsumerRecordFactory import org.scalatest.{FlatSpec, Matchers} class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup { val wordCountApplication = new WordCountTestable "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in { val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config) val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer()) val words = "Hello, WORLDY, World worlD Test" driver.pipeInput(recordFactory.create(words)) val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer()) record.value() shouldBe words.toLowerCase driver.close() } "WordCountTestable" should "count number of words" in { val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config) val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer()) val words = "Hello Kafka Streams, All streams lead to Kafka" driver.pipeInput(recordFactory.create(words)) val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store") store.get("hello") shouldBe 1 store.get("kafka") shouldBe 2 store.get("streams") shouldBe 2 store.get("lead") shouldBe 1 store.get("to") shouldBe 1 driver.close() } }
Example 76
Source File: PredictionLogger.scala From ForestFlow with Apache License 2.0 | 5 votes |
package ai.forestflow.event.subscribers import java.nio.ByteOrder import ai.forestflow.domain.{PredictionEvent, PredictionEventGP} import ai.forestflow.serving.config.ApplicationEnvironment import akka.actor.{Actor, ActorLogging, Props} import akka.kafka.ProducerSettings import ai.forestflow.domain.{PredictionEvent, PredictionEventGP} import graphpipe.InferRequest import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{ByteArraySerializer, StringSerializer} //import scalapb.json4s.JsonFormat import scala.util.{Success, Try} object PredictionLogger { private lazy val binaryProducerSettings = ProducerSettings(producerConfig, new StringSerializer, new ByteArraySerializer) private lazy val binaryProducer = binaryProducerSettings.createKafkaProducer() override def preStart(): Unit = { if (basic_topic.isDefined) context.system.eventStream.subscribe(self, classOf[PredictionEvent]) if (gp_topic.isDefined) context.system.eventStream.subscribe(self, classOf[PredictionEventGP]) super.preStart() } override def receive: Receive = { case event@PredictionEvent(prediction, servedRequest, inferenceRequest, loggingSettings) => val key = loggingSettings .keyFeatures .flatMap(inferenceRequest.configs.get) .mkString(loggingSettings.getKeyFeaturesSeparator) if (key.length > 0 ) binaryProducer.send(new ProducerRecord(basic_topic.get, key, event.toByteArray)) else binaryProducer.send(new ProducerRecord(basic_topic.get, event.toByteArray)) case event@PredictionEventGP(prediction, servedRequest, inferBytes, loggingSettings) => Try { val req = graphpipe.Request.getRootAsRequest(inferBytes.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN)) val inferRequest = req.req(new InferRequest()).asInstanceOf[InferRequest] val inferConfigs = inferRequest.config() .split(",") .map(_.split(":")) .flatMap{ case Array(k, v) => Some((k, v)) case _ => None}.toMap loggingSettings .keyFeatures .flatMap(inferConfigs.get) .mkString(loggingSettings.getKeyFeaturesSeparator) } match { case Success(key) => binaryProducer.send(new ProducerRecord(gp_topic.get, key, event.toByteArray)) case _ => binaryProducer.send(new ProducerRecord(gp_topic.get, event.toByteArray)) } case _ => // ignore } }
Example 77
Source File: ExternalKafkaProcessorSupplier.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.kstream.processor import com.expedia.metrics.MetricData import com.expedia.www.haystack.trends.config.entities.KafkaProduceConfiguration import com.expedia.www.haystack.trends.kstream.serde.TrendMetricSerde.metricRegistry import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import org.apache.kafka.streams.processor.{AbstractProcessor, Processor, ProcessorContext, ProcessorSupplier} import org.slf4j.LoggerFactory class ExternalKafkaProcessorSupplier(kafkaProduceConfig: KafkaProduceConfiguration) extends ProcessorSupplier[String, MetricData] { private val LOGGER = LoggerFactory.getLogger(this.getClass) private val metricPointExternalKafkaSuccessMeter = metricRegistry.meter("metricpoint.kafka-external.success") private val metricPointExternalKafkaFailureMeter = metricRegistry.meter("metricpoint.kafka-external.failure") def get: Processor[String, MetricData] = { new ExternalKafkaProcessor(kafkaProduceConfig: KafkaProduceConfiguration) } def process(key: String, value: MetricData): Unit = { val kafkaMessage = new ProducerRecord(kafkaProduceTopic, key, value) kafkaProducer.send(kafkaMessage, new Callback { override def onCompletion(recordMetadata: RecordMetadata, e: Exception): Unit = { if (e != null) { LOGGER.error(s"Failed to produce the message to kafka for topic=$kafkaProduceTopic, with reason=", e) metricPointExternalKafkaFailureMeter.mark() } else { metricPointExternalKafkaSuccessMeter.mark() } } }) } } }
Example 78
Source File: KafkaClient.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.interfaces.async.kafka import java.util.UUID import java.util.concurrent.atomic.AtomicBoolean import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.collection.JavaConverters._ import scala.concurrent.{Future, Promise} class TopicProducer[K, V]( producer: KafkaProducer[K, V], topic: String ) { def send(key:K, value: V): Unit = { val record = new ProducerRecord(topic, key, value) producer.send(record) } def close(): Unit = { producer.close() } } object TopicProducer { def apply( host: String, port: Int, topic: String): TopicProducer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer) new TopicProducer(producer, topic) } } class TopicConsumer[K, V]( consumer: KafkaConsumer[K, V], topic: String, timeout: Long = 100 ) { private val promise = Promise[Unit] private val stopped = new AtomicBoolean(false) def subscribe(f: (K, V) => Unit): Future[Unit] = { run(f) promise.future } private def run(f: (K, V) => Unit): Unit = { consumer.subscribe(Seq(topic).asJava) val thread = new Thread(new Runnable { override def run(): Unit = { while (!stopped.get()) { val records = consumer.poll(timeout).asScala records.foreach(r => f(r.key(), r.value())) } promise.success(()) } }) thread.setName(s"kafka-topic-consumer-$topic") thread.start() } def close(): Future[Unit] = { stopped.set(true) promise.future } } object TopicConsumer { def apply( host: String, port: Int, topic: String): TopicConsumer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") props.put("group.id", "mist-" + UUID.randomUUID().toString) props.put("enable.auto.commit", "true") props.put("auto.commit.interval.ms", "1000") props.put("session.timeout.ms", "30000") val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer) new TopicConsumer(consumer, topic) } }
Example 79
Source File: KafkaWordCount.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 80
Source File: ProcessingKafkaApplication.scala From Akka-Cookbook with MIT License | 5 votes |
package com.packt.chapter8 import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions} import akka.stream.{ActorMaterializer, ClosedShape} import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer} import scala.concurrent.duration._ object ProcessingKafkaApplication extends App { implicit val actorSystem = ActorSystem("SimpleStream") implicit val actorMaterializer = ActorMaterializer() val bootstrapServers = "localhost:9092" val kafkaTopic = "akka_streams_topic" val partition = 0 val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition)) val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServers) .withGroupId("akka_streams_group") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer) .withBootstrapServers(bootstrapServers) val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder => import GraphDSL.Implicits._ val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!") val kafkaSource = Consumer.plainSource(consumerSettings, subscription) val kafkaSink = Producer.plainSink(producerSettings) val printlnSink = Sink.foreach(println) val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem)) val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value()) tickSource ~> mapToProducerRecord ~> kafkaSink kafkaSource ~> mapFromConsumerRecord ~> printlnSink ClosedShape }) runnableGraph.run() }
Example 81
Source File: KafkaSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka import java.util.Properties import com.twitter.bijection.Injection import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} import org.apache.gearpump.Message import org.apache.gearpump.streaming.MockUtil class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { val dataGen = for { topic <- Gen.alphaStr key <- Gen.alphaStr msg <- Gen.alphaStr } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg)) property("KafkaSink write should send producer record") { forAll(dataGen) { (data: (String, Array[Byte], Array[Byte])) => val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val (topic, key, msg) = data val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory) kafkaSink.write(Message((key, msg))) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg))) kafkaSink.write(Message(msg)) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg) )) kafkaSink.close() } } property("KafkaSink close should close kafka producer") { val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory) kafkaSink.close() verify(producer).close() } }
Example 82
Source File: KafkaStore.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.store import java.util.Properties import com.twitter.bijection.Injection import kafka.api.OffsetRequest import org.apache.gearpump.Time.MilliSeconds import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory} import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer class KafkaStore private[kafka]( val topic: String, val producer: KafkaProducer[Array[Byte], Array[Byte]], val optConsumer: Option[KafkaConsumer]) extends CheckpointStore { import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._ private var maxTime: MilliSeconds = 0L override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = { // make sure checkpointed timestamp is monotonically increasing // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3) if (time > maxTime) { maxTime = time } val key = maxTime val value = checkpoint val message = new ProducerRecord[Array[Byte], Array[Byte]]( topic, 0, Injection[Long, Array[Byte]](key), value) producer.send(message) LOG.debug("KafkaStore persisted state ({}, {})", key, value) } override def recover(time: MilliSeconds): Option[Array[Byte]] = { var checkpoint: Option[Array[Byte]] = None optConsumer.foreach { consumer => while (consumer.hasNext && checkpoint.isEmpty) { val kafkaMsg = consumer.next() checkpoint = for { k <- kafkaMsg.key t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption c = kafkaMsg.msg if t >= time } yield c } consumer.close() } checkpoint match { case Some(c) => LOG.info(s"KafkaStore recovered checkpoint ($time, $c)") case None => LOG.info(s"no checkpoint existing for $time") } checkpoint } override def close(): Unit = { producer.close() LOG.info("KafkaStore closed") } }
Example 83
Source File: AbstractKafkaSink.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.sink import java.util.Properties import org.apache.gearpump.Message import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.sink.DataSink import org.apache.gearpump.streaming.task.TaskContext import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer object AbstractKafkaSink { private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink]) val producerFactory = new KafkaProducerFactory { override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = { new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig, new ByteArraySerializer, new ByteArraySerializer) } } trait KafkaProducerFactory extends java.io.Serializable { def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] } } abstract class AbstractKafkaSink private[kafka]( topic: String, props: Properties, kafkaConfigFactory: KafkaConfigFactory, factory: KafkaProducerFactory) extends DataSink { import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._ def this(topic: String, props: Properties) = { this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory) } private lazy val config = kafkaConfigFactory.getKafkaConfig(props) // Lazily construct producer since KafkaProducer is not serializable private lazy val producer = factory.getKafkaProducer(config) override def open(context: TaskContext): Unit = { LOG.info("KafkaSink opened") } override def write(message: Message): Unit = { message.value match { case (k: Array[Byte], v: Array[Byte]) => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case v: Array[Byte] => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case m => val errorMsg = s"unexpected message type ${m.getClass}; " + s"Array[Byte] or (Array[Byte], Array[Byte]) required" LOG.error(errorMsg) } } override def close(): Unit = { producer.close() LOG.info("KafkaSink closed") } }
Example 84
Source File: NumericalDataProducer.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.integrationtest.kafka import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer import org.apache.log4j.Logger import org.apache.gearpump.streaming.serializer.ChillSerializer class NumericalDataProducer(topic: String, bootstrapServers: String) { private val LOG = Logger.getLogger(getClass) private val producer = createProducer private val WRITE_SLEEP_NANOS = 10 private val serializer = new ChillSerializer[Int] var lastWriteNum = 0 def start(): Unit = { produceThread.start() } def stop(): Unit = { if (produceThread.isAlive) { produceThread.interrupt() produceThread.join() } producer.close() } def producedNumbers: Range = { Range(1, lastWriteNum + 1) } private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = { val properties = new Properties() properties.setProperty("bootstrap.servers", bootstrapServers) new KafkaProducer[Array[Byte], Array[Byte]](properties, new ByteArraySerializer, new ByteArraySerializer) } private val produceThread = new Thread(new Runnable { override def run(): Unit = { try { while (!Thread.currentThread.isInterrupted) { lastWriteNum += 1 val msg = serializer.serialize(lastWriteNum) val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg) producer.send(record) Thread.sleep(0, WRITE_SLEEP_NANOS) } } catch { case ex: InterruptedException => LOG.error("message producing is stopped by an interrupt") } } }) }
Example 85
Source File: KafkaProducerInjector.scala From SparkOnKudu with Apache License 2.0 | 5 votes |
package org.kududb.spark.demo.gamer.aggregates import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} object KafkaProducerInjector { def main(args:Array[String]): Unit = { if (args.length == 0) { println("{brokerList} {topic} {#OfRecords} {sleepTimeEvery10Records} {#OfGamers}") return } val brokerList = args(0) val topic = args(1) val numOfRecords = args(2).toInt val sleepTimeEvery10Records = args(3).toInt val numOfGamers = args(4).toInt val producer = getNewProducer(brokerList) for (i <- 0 until numOfRecords) { val gamerRecord = GamerDataGenerator.makeNewGamerRecord(numOfGamers) val message = new ProducerRecord[String, String](topic, gamerRecord.gamerId.toString, gamerRecord.toString()) producer.send(message) if (i % 10 == 0) { Thread.sleep(sleepTimeEvery10Records) print(".") } if (i % 2000 == 0) { println() println("Records Sent:" + i) println() } } } def getNewProducer(brokerList:String): KafkaProducer[String, String] = { val kafkaProps = new Properties kafkaProps.put("bootstrap.servers", brokerList) kafkaProps.put("metadata.broker.list", brokerList) // This is mandatory, even though we don't send keys kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("acks", "0") // how many times to retry when produce request fails? kafkaProps.put("retries", "3") kafkaProps.put("linger.ms", "2") kafkaProps.put("batch.size", "1000") kafkaProps.put("queue.time", "2") new KafkaProducer[String, String](kafkaProps) } }
Example 86
Source File: KafkaOutput.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.output.kafka import java.io.{Serializable => JSerializable} import java.util.Properties import com.stratio.sparta.plugin.input.kafka.KafkaBase import com.stratio.sparta.sdk.pipeline.output.Output._ import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum} import com.stratio.sparta.sdk.properties.CustomProperties import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._ import org.apache.kafka.clients.producer.ProducerConfig._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.apache.spark.sql._ import scala.collection.mutable class KafkaOutput(name: String, properties: Map[String, JSerializable]) extends Output(name, properties) with KafkaBase with CustomProperties { val DefaultKafkaSerializer = classOf[StringSerializer].getName val DefaultAck = "0" val DefaultBatchNumMessages = "200" val DefaultProducerPort = "9092" override val customKey = "KafkaProperties" override val customPropertyKey = "kafkaPropertyKey" override val customPropertyValue = "kafkaPropertyValue" val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase) val rowSeparator = properties.getString("rowSeparator", ",") override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append) override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = { val tableName = getTableNameFromOptions(options) validateSaveMode(saveMode) outputFormat match { case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages => messages.foreach(message => send(tableName, message.mkString(rowSeparator)))) case _ => dataFrame.toJSON.foreachPartition { messages => messages.foreach(message => send(tableName, message)) } } } def send(topic: String, message: String): Unit = { val record = new ProducerRecord[String, String](topic, message) KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record) } private[kafka] def getProducerConnectionKey: String = getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list")) private[kafka] def createProducerProps: Properties = { val props = new Properties() properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) } mandatoryOptions.foreach { case (key, value) => props.put(key, value) } getCustomProperties.foreach { case (key, value) => props.put(key, value) } props } private[kafka] def mandatoryOptions: Map[String, String] = getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++ Map( KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer), VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer), ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck), BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages) ) override def cleanUp(options: Map[String, String]): Unit = { log.info(s"Closing Kafka producer in Kafka Output: $name") KafkaOutput.closeProducers() } } object KafkaOutput { private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = { getInstance(producerKey, properties) } def closeProducers(): Unit = { producers.values.foreach(producer => producer.close()) } private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = { producers.getOrElse(key, { val producer = new KafkaProducer[String, String](properties) producers.put(key, producer) producer }) } }
Example 87
Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.language.postfixOps import scala.util.Random object MultiDataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 3000 milliseconds) { (1 to Random.nextInt(100)).foreach { id => producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString)) } } }