org.apache.kafka.clients.producer.ProducerConfig Scala Examples
The following examples show how to use org.apache.kafka.clients.producer.ProducerConfig.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: IntegrationTest.scala From kmq with Apache License 2.0 | 6 votes |
package com.softwaremill.kmq.redelivery import java.time.Duration import java.util.Random import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions} import akka.stream.ActorMaterializer import akka.testkit.TestKit import com.softwaremill.kmq._ import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.concurrent.Eventually import org.scalatest.time.{Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers} import scala.collection.mutable.ArrayBuffer class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers { implicit val materializer = ActorMaterializer() import system.dispatcher "KMQ" should "resend message if not committed" in { val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}" val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis, 1000) val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServer) .withGroupId(kmqConfig.getMsgConsumerGroupId) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val markerProducerSettings = ProducerSettings(system, new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer()) .withBootstrapServers(bootstrapServer) .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName) val markerProducer = markerProducerSettings.createKafkaProducer() val random = new Random() lazy val processedMessages = ArrayBuffer[String]() lazy val receivedMessages = ArrayBuffer[String]() val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic .map { msg => ProducerMessage.Message( new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg) } .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker .map(_.message.passThrough) .mapAsync(1) { msg => msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched } .map { msg => receivedMessages += msg.value msg } .filter(_ => random.nextInt(5) != 0) .map { processedMessage => processedMessages += processedMessage.value new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE) } .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers .run() val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig) val messages = (0 to 20).map(_.toString) messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg)) eventually { receivedMessages.size should be > processedMessages.size processedMessages.sortBy(_.toInt).distinct shouldBe messages }(PatienceConfig(timeout = Span(15, Seconds)), implicitly) redeliveryHook.close() control.shutdown() } override def afterAll(): Unit = { super.afterAll() TestKit.shutdownActorSystem(system) } }
Example 2
Source File: BasicSimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class BasicSimulation extends Simulation { val kafkaConf = kafka // Kafka topic name .topic("test") // Kafka producer configs .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", // list of Kafka broker hostname and port pairs ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", // in most cases, StringSerializer or ByteArraySerializer ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer")) val scn = scenario("Kafka Test") .exec( kafka("request") // message to send .send[String]("foo")) setUp( scn .inject(constantUsersPerSec(10) during(90 seconds))) .protocols(kafkaConf) }
Example 3
Source File: Kafka.scala From event-sourcing-kafka-streams with MIT License | 5 votes |
package org.amitayh.invoices.web import java.time.Duration import java.util.Collections.singletonList import java.util.Properties import cats.effect._ import cats.syntax.apply._ import cats.syntax.functor._ import fs2._ import org.amitayh.invoices.common.Config import org.amitayh.invoices.common.Config.Topics.Topic import org.apache.kafka.clients.consumer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.log4s.{Logger, getLogger} import scala.collection.JavaConverters._ object Kafka { trait Producer[F[_], K, V] { def send(key: K, value: V): F[RecordMetadata] } object Producer { def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] = (key: K, value: V) => Async[F].async { cb => val record = new ProducerRecord(topic.name, key, value) producer.send(record, (metadata: RecordMetadata, exception: Exception) => { if (exception != null) cb(Left(exception)) else cb(Right(metadata)) }) } } def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource { val create = Sync[F].delay { val props = new Properties props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer) } create.map(producer => (Producer(producer, topic), close(producer))) } def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = { val create = Sync[F].delay { val props = new Properties props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId) val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer) consumer.subscribe(singletonList(topic.name)) consumer } Stream.bracket(create)(close[F]).flatMap(consume[F, K, V]) } private val logger: Logger = getLogger def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg)) private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for { records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1)))) record <- Stream.emits(records.iterator.asScala.toSeq) } yield record.key -> record.value private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] = Sync[F].delay(producer.close()) *> log(s"Producer closed") private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] = Sync[F].delay(consumer.close()) *> log("Consumer closed") }
Example 4
Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import java.util.Properties import com.timeyang.common.config.BaseConf import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer object KafkaProducerUtils { @volatile lazy private val producer: KafkaProducer[String, String] = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList) props.put("acks", "all") props.put("retries", 1: Integer) props.put("batch.size", 16384: Integer) props.put("linger.ms", 1: Integer) props.put("buffer.memory", 33554432: Integer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) new KafkaProducer[String, String](props) } def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = { for (event <- event +: events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, events: List[Object]): Unit = { for (event <- events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, event: Object): Unit = { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } }
Example 5
Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.client import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer class MessageSender(val brokers: String) { import MessageSender._ val producer = new KafkaProducer[Array[Byte], Array[Byte]]( providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName)) def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get producer.flush() } def writeValue(topic: String, value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get producer.flush() } def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 6
Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafka def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 7
Source File: package.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.events import java.util import com.wavesplatform.events.protobuf.PBEvents import com.wavesplatform.events.settings.BlockchainUpdatesSettings import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.config.SaslConfigs import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer} package object kafka { private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def close(): Unit = {} override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] = PBEvents.protobuf(data).toByteArray } private object IntSerializer extends Serializer[Int] { val integerSerializer = new IntegerSerializer override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey) override def close(): Unit = integerSerializer.close() override def serialize(topic: String, data: Int): Array[Byte] = integerSerializer.serialize(topic, data) } def createProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = new util.Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers) props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId) // props.put(ProducerConfig.RETRIES_CONFIG, "0") // SASL_SSL if (settings.ssl.enabled) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL") props.put(SaslConfigs.SASL_MECHANISM, "PLAIN") props.put( SaslConfigs.SASL_JAAS_CONFIG, s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';" ) } props } def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = createProperties(settings) props.put(ProducerConfig.ACKS_CONFIG, "all") props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB props } def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] = new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer) def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = { val h = event match { case ap: BlockAppended => ap.toHeight case MicroBlockAppended(_, height, _, _, _) => height case RollbackCompleted(_, height) => height case MicroBlockRollbackCompleted(_, height) => height } new ProducerRecord[Int, BlockchainUpdated](topic, h, event) } }
Example 8
Source File: KafkaWordCount.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 9
Source File: ConfigurationLoaderSpec.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.unit import com.expedia.www.haystack.trace.commons.packer.PackerType import com.expedia.www.haystack.trace.indexer.config.ProjectConfiguration import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import org.scalatest.{FunSpec, Matchers} class ConfigurationLoaderSpec extends FunSpec with Matchers { val project = new ProjectConfiguration() describe("Configuration loader") { it("should load the health status config from base.conf") { project.healthStatusFilePath shouldEqual "/app/isHealthy" } it("should load the span buffer config only from base.conf") { val config = project.spanAccumulateConfig config.pollIntervalMillis shouldBe 2000L config.maxEntriesAllStores shouldBe 20000 config.bufferingWindowMillis shouldBe 10000L config.packerType shouldEqual PackerType.SNAPPY } it("should load the kafka config from base.conf and one stream property from env variable") { val kafkaConfig = project.kafkaConfig kafkaConfig.produceTopic shouldBe "span-buffer" kafkaConfig.consumeTopic shouldBe "spans" kafkaConfig.numStreamThreads shouldBe 2 kafkaConfig.commitOffsetRetries shouldBe 3 kafkaConfig.commitBackoffInMillis shouldBe 200 kafkaConfig.maxWakeups shouldBe 5 kafkaConfig.wakeupTimeoutInMillis shouldBe 5000 kafkaConfig.consumerProps.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092" kafkaConfig.consumerProps.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG) shouldBe "earliest" kafkaConfig.consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG) shouldBe "haystack-trace-indexer" kafkaConfig.consumerProps.getProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG) shouldBe "false" kafkaConfig.consumerProps.getProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringDeserializer" kafkaConfig.consumerProps.getProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG) shouldBe "com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer" kafkaConfig.consumerCloseTimeoutInMillis shouldBe 30000 kafkaConfig.producerProps.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092" kafkaConfig.producerProps.getProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.ByteArraySerializer" kafkaConfig.producerProps.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringSerializer" } it("should load the service metadata config from base.conf") { val config = project.serviceMetadataWriteConfig config.flushIntervalInSec shouldBe 60 config.flushOnMaxOperationCount shouldBe 10000 config.esEndpoint shouldBe "http://elasticsearch:9200" config.maxInFlightBulkRequests shouldBe 10 config.maxDocsInBulk shouldBe 100 config.maxBulkDocSizeInBytes shouldBe 1000000 config.indexTemplateJson shouldBe Some("some_template_json") config.consistencyLevel shouldBe "one" config.readTimeoutMillis shouldBe 5000 config.connectionTimeoutMillis shouldBe 10000 config.indexName shouldBe "service-metadata" config.indexType shouldBe "metadata" config.retryConfig.maxRetries shouldBe 10 config.retryConfig.backOffInMillis shouldBe 100 config.retryConfig.backoffFactor shouldBe 2 } it("should load the trace backend config from base.conf and few properties overridden from env variable") { val backendConfiguration = project.backendConfig backendConfiguration.maxInFlightRequests shouldBe 100 } it("should load the elastic search config from base.conf and one property overridden from env variable") { val elastic = project.elasticSearchConfig elastic.endpoint shouldBe "http://elasticsearch:9200" elastic.maxInFlightBulkRequests shouldBe 10 elastic.maxDocsInBulk shouldBe 100 elastic.maxBulkDocSizeInBytes shouldBe 1000000 elastic.indexTemplateJson shouldBe Some("some_template_json") elastic.consistencyLevel shouldBe "one" elastic.readTimeoutMillis shouldBe 5000 elastic.connectionTimeoutMillis shouldBe 10000 elastic.indexNamePrefix shouldBe "haystack-test" elastic.indexType shouldBe "spans" elastic.retryConfig.maxRetries shouldBe 10 elastic.retryConfig.backOffInMillis shouldBe 1000 elastic.retryConfig.backoffFactor shouldBe 2 elastic.indexHourBucket shouldBe 6 elastic.maxConnectionsPerRoute shouldBe 10 elastic.awsRequestSigningConfiguration.enabled shouldEqual false elastic.awsRequestSigningConfiguration.region shouldEqual "us-west-2" elastic.awsRequestSigningConfiguration.awsServiceName shouldEqual "es" elastic.awsRequestSigningConfiguration.accessKey shouldBe None elastic.awsRequestSigningConfiguration.secretKey shouldBe None } } }
Example 10
Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.integration.clients import java.util.Properties import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer} import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer} import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster object KafkaTestClient { val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1) KAFKA_CLUSTER.start() } class KafkaTestClient { import KafkaTestClient._ val INPUT_TOPIC = "spans" val OUTPUT_TOPIC = "span-buffer" val APP_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer]) props } val APP_CONSUMER_CONFIG: Properties = new Properties() val TEST_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer]) props } val RESULT_CONSUMER_CONFIG = new Properties() def buildConfig = KafkaConfiguration(numStreamThreads = 1, pollTimeoutMs = 100, APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC, consumerCloseTimeoutInMillis = 3000, commitOffsetRetries = 3, commitBackoffInMillis = 250, maxWakeups = 5, wakeupTimeoutInMillis = 3000) def prepare(appId: String): Unit = { APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer") APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer]) deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC) KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1) KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC) } private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*) }
Example 11
Source File: KafkaWordCount.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka.KafkaUtils StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. // object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 12
Source File: KafkaWordCount.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 13
Source File: EmbeddedKsqlEngine.scala From ksql-jdbc-driver with Apache License 2.0 | 5 votes |
package com.github.mmolimar.ksql.jdbc.embedded import java.io.IOException import com.github.mmolimar.ksql.jdbc.utils.TestUtils import io.confluent.ksql.rest.server.{KsqlRestApplication, KsqlRestConfig} import io.confluent.ksql.version.metrics.VersionCheckerAgent import io.confluent.rest.RestConfig import kafka.utils.Logging import org.apache.kafka.clients.producer.ProducerConfig import org.scalamock.scalatest.MockFactory import io.confluent.ksql.util.KsqlConfig import scala.collection.JavaConverters._ class EmbeddedKsqlEngine(port: Int = TestUtils.getAvailablePort, brokerList: String, connectUrl: String) extends Logging with MockFactory { private val config = new KsqlRestConfig(Map( RestConfig.LISTENERS_CONFIG -> s"http://localhost:$port", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList, KsqlConfig.CONNECT_URL_PROPERTY -> connectUrl, "ksql.service.id" -> "ksql-jdbc", "ksql.streams.auto.offset.reset" -> "latest", "ksql.command.topic.suffix" -> "commands" ).asJava) lazy val ksqlEngine: KsqlRestApplication = { import io.confluent.ksql.rest.server.mock.ksqlRestApplication val versionCheckerAgent = mock[VersionCheckerAgent] (versionCheckerAgent.start _).expects(*, *).returns((): Unit).anyNumberOfTimes (versionCheckerAgent.updateLastRequestTime _).expects().returns((): Unit).anyNumberOfTimes ksqlRestApplication(config, versionCheckerAgent) } @throws[IOException] def startup(): Unit = { info("Starting up embedded KSQL engine") ksqlEngine.start() info("Started embedded Zookeeper: " + getConnection) } def shutdown(): Unit = { info("Shutting down embedded KSQL engine") TestUtils.swallow(ksqlEngine.stop()) info("Stopped embedded KSQL engine") } def getPort: Int = port def getConnection: String = "localhost:" + getPort override def toString: String = { val sb: StringBuilder = new StringBuilder("KSQL{") sb.append("connection=").append(getConnection) sb.append('}') sb.toString } }
Example 14
Source File: KafkaWordCount.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 15
Source File: ThrottledSimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class ThrottledSimulation extends Simulation { val kafkaConf = kafka // Kafka topic name .topic("test") // Kafka producer configs .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", // list of Kafka broker hostname and port pairs ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", // in most cases, StringSerializer or ByteArraySerializer ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer")) val scn = scenario("Kafka Test") .forever( exec( kafka("request") // message to send .send[String]("foo")) ) setUp( scn.inject(atOnceUsers(10))) .throttle(jumpToRps(10), holdFor(30 seconds)) .protocols(kafkaConf) }
Example 16
Source File: FeederSimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class FeederSimulation extends Simulation { val kafkaConf = kafka .topic("test") .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer")) val scn = scenario("Kafka Test") // You can also use feeder .feed(csv("test.csv").circular) .exec(kafka("request").send[String]("${foo}")) setUp( scn .inject(constantUsersPerSec(10) during(90 seconds))) .protocols(kafkaConf) }
Example 17
Source File: ByteArraySimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class ByteArraySimulation extends Simulation { val kafkaConf = kafka .topic("test") .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.ByteArraySerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.ByteArraySerializer")) val scn = scenario("Kafka Test") .exec(kafka("request").send("foo".getBytes: Array[Byte])) setUp( scn .inject(constantUsersPerSec(10) during(90 seconds))) .protocols(kafkaConf) }
Example 18
Source File: FeederByteArraySimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class FeederByteArraySimulation extends Simulation { val kafkaConf = kafka .topic("test") .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.ByteArraySerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.ByteArraySerializer")) val scn = scenario("Kafka Test") .feed(csv("test.csv").circular) .exec( kafka("request") .send(session => session("foo").validate[String].map(s => s.getBytes))) setUp( scn .inject(constantUsersPerSec(10) during(90 seconds))) .protocols(kafkaConf) }
Example 19
Source File: FeederKeyValueSimulation.scala From gatling-kafka with Apache License 2.0 | 5 votes |
package com.github.mnogu.gatling.kafka.test import io.gatling.core.Predef._ import org.apache.kafka.clients.producer.ProducerConfig import scala.concurrent.duration._ import com.github.mnogu.gatling.kafka.Predef._ class FeederKeyValueSimulation extends Simulation { val kafkaConf = kafka .topic("test") .properties( Map( ProducerConfig.ACKS_CONFIG -> "1", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092", ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer", ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringSerializer")) val scn = scenario("Kafka Test") .feed(csv("kv.csv").circular) // You can also set the key that will be included in the record. // // The content of the CSV file above would be like this: // key,value // k1,v1 // k2,v2 // k3,v3 // ... // // And each line corresponds to a record sent to Kafka. .exec(kafka("request").send[String, String]("${key}", "${value}")) setUp( scn .inject(constantUsersPerSec(10) during(90 seconds))) .protocols(kafkaConf) }
Example 20
Source File: KafkaWordCount.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 21
Source File: EmbeddedKafkaCustomConfigSpec.scala From embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import kafka.server.KafkaConfig import net.manub.embeddedkafka.EmbeddedKafka._ import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import scala.language.postfixOps import scala.util.Random class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport { final val TwoMegabytes = 2097152 final val ThreeMegabytes = 3145728 "the custom config" should { "allow pass additional producer parameters" in { val customBrokerConfig = Map( KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes", KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes" ) val customProducerConfig = Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes") val customConsumerConfig = Map( ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes" ) implicit val customKafkaConfig: EmbeddedKafkaConfig = EmbeddedKafkaConfig( customBrokerProperties = customBrokerConfig, customProducerProperties = customProducerConfig, customConsumerProperties = customConsumerConfig ) val bigMessage = generateMessageOfLength(TwoMegabytes) val topic = "big-message-topic" withRunningKafka { publishStringMessageToKafka(topic, bigMessage) consumeFirstStringMessageFrom(topic) shouldBe bigMessage } } } def generateMessageOfLength(length: Int): String = Iterator.continually(Random.nextPrintableChar) take length mkString }
Example 22
Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.language.postfixOps import scala.util.Random object MultiDataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 3000 milliseconds) { (1 to Random.nextInt(100)).foreach { id => producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString)) } } }
Example 23
Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStaticDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](topic, write(stock))) } } }
Example 24
Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object DataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val someWords = List("about", "above", "after", "again", "against") info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 200 milliseconds) { Random.shuffle(someWords).headOption.foreach { word => producer.send(new ProducerRecord[String, String](topic, word)) } } }
Example 25
Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config._ import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStreamDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming companies listed into Kafka...") system.scheduler.schedule(0 seconds, 20 seconds) { randomCompanyNames.foreach { name => producer.send(new ProducerRecord[String, String](companiesTopic, name)) } } info("Streaming stocks data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](stocksTopic, write(stock))) } } }
Example 26
Source File: ProducerSettings.scala From zio-kafka with Apache License 2.0 | 5 votes |
package zio.kafka.producer import org.apache.kafka.clients.producer.ProducerConfig import zio.duration._ case class ProducerSettings( bootstrapServers: List[String], closeTimeout: Duration, properties: Map[String, AnyRef] ) { def driverSettings: Map[String, AnyRef] = Map(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstrapServers.mkString(",")) ++ properties def withBootstrapServers(servers: List[String]): ProducerSettings = copy(bootstrapServers = servers) def withCloseTimeout(duration: Duration): ProducerSettings = copy(closeTimeout = duration) def withProperty(key: String, value: AnyRef): ProducerSettings = copy(properties = properties + (key -> value)) def withProperties(kvs: (String, AnyRef)*): ProducerSettings = withProperties(kvs.toMap) def withProperties(kvs: Map[String, AnyRef]): ProducerSettings = copy(properties = properties ++ kvs) } object ProducerSettings { def apply(bootstrapServers: List[String]): ProducerSettings = new ProducerSettings(bootstrapServers, 30.seconds, Map()) }
Example 27
Source File: KafkaTransmitter.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.transmitters import java.util.Properties import akka.actor.{ActorLogging, Props} import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import scala.sys.SystemProperties import com.typesafe.config.Config object KafkaTransmitter { def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic)) } class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging { private val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers")) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer")) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer")) // Enable settings for a secure environment, if necessary. // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html val systemProperties = new SystemProperties if (config.getBoolean("transmitter.kafka.security-enabled")) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol")) systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file")) } private val producer = new KafkaProducer[String, String](props) def receive = { case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV)) } override def postStop(): Unit = { producer.close() log.info("KafkaTransmitter closed its producer.") } }
Example 28
Source File: EventProducer.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.rokku.proxy.provider.kafka import akka.Done import akka.http.scaladsl.model.HttpMethod import com.ing.wbaa.rokku.proxy.config.KafkaSettings import com.ing.wbaa.rokku.proxy.data.RequestId import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata } import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.{ ExecutionContext, Future } trait EventProducer { private val logger = new LoggerHandlerWithId import scala.collection.JavaConverters._ protected[this] implicit val kafkaSettings: KafkaSettings protected[this] implicit val executionContext: ExecutionContext private lazy val config: Map[String, Object] = Map[String, Object]( "bootstrap.servers" -> kafkaSettings.bootstrapServers, ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries, ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff, ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax, CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol, ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock, ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs, "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation, "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword, "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation, "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword, "ssl.key.password" -> kafkaSettings.sslKeyPassword ) private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer) def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = { kafkaProducer .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => { exception match { case e: Exception => MetricsFactory.incrementKafkaSendErrors logger.error("error in sending event {} to topic {}, error={}", event, topic, e) throw new Exception(e) case _ => httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) } logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset()) } }) match { case _ => Future(Done) } } }
Example 29
Source File: KafkaWordCount.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.basic import java.util import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount"). set("spark.streaming.receiver.writeAheadLog.enable", "true"). set("spark.streaming.kafka.maxRatePerPartition", "1000") val ssc = new StreamingContext(sparkConf, Seconds(2)) // 设置 checkpoint,这是考虑到了有 window 操作,window 操作一般是需要进行 checkpoint ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap // createStream 返回的是一个 Tuple2,具有 key,value,这里只关注 value. // 注意这里是 Receiver-based 方式(还提供了 non-receiver 模式),默认配置下,这种方式是会在 receiver 挂掉 // 丢失数据的,需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整. val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2) val words = lines.flatMap(_.split(" ")) // 统计的是 10 分钟内的单词数量,每隔 10 秒统计 1 次 val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2). filter(x => x._2 > 0) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } // 需要注意的是这里是 broker list,为 host:port,host:port 形式 val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new util.HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while (true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 30
Source File: KafkaWordCount.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 31
Source File: EmbeddedKafkaCustomConfigSpec.scala From scalatest-embedded-kafka with MIT License | 4 votes |
package net.manub.embeddedkafka import kafka.server.KafkaConfig import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import scala.language.postfixOps import scala.util.Random class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport with EmbeddedKafka { val TwoMegabytes = 2097152 val ThreeMegabytes = 3145728 "the custom config" should { "allow pass additional producer parameters" in { val customBrokerConfig = Map(KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes", KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes") val customProducerConfig = Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes") val customConsumerConfig = Map( ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes") implicit val customKafkaConfig = EmbeddedKafkaConfig(customBrokerProperties = customBrokerConfig, customProducerProperties = customProducerConfig, customConsumerProperties = customConsumerConfig) val bigMessage = generateMessageOfLength(TwoMegabytes) val topic = "big-message-topic" withRunningKafka { publishStringMessageToKafka(topic, bigMessage) consumeFirstStringMessageFrom(topic) shouldBe bigMessage } } } def generateMessageOfLength(length: Int): String = Stream.continually(Random.nextPrintableChar) take length mkString }