org.apache.kafka.clients.producer.KafkaProducer Scala Examples
The following examples show how to use org.apache.kafka.clients.producer.KafkaProducer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaClient.scala From incubator-retired-gearpump with Apache License 2.0 | 6 votes |
package org.apache.gearpump.streaming.kafka.lib.util import kafka.admin.AdminUtils import kafka.cluster.Broker import kafka.common.TopicAndPartition import kafka.consumer.SimpleConsumer import kafka.utils.{ZKStringSerializer, ZkUtils} import org.I0Itec.zkclient.ZkClient import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.common.serialization.Serializer object KafkaClient { private val LOG = LogUtil.getLogger(classOf[KafkaClient]) val factory = new KafkaClientFactory class KafkaClientFactory extends java.io.Serializable { def getKafkaClient(config: KafkaConfig): KafkaClient = { val consumerConfig = config.getConsumerConfig val zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs, consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer) new KafkaClient(config, zkClient) } } } class KafkaClient(config: KafkaConfig, zkClient: ZkClient) { import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient._ private val consumerConfig = config.getConsumerConfig def getTopicAndPartitions(consumerTopics: List[String]): Array[TopicAndPartition] = { try { ZkUtils.getPartitionsForTopics(zkClient, consumerTopics).flatMap { case (topic, partitions) => partitions.map(TopicAndPartition(topic, _)) }.toArray } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def getBroker(topic: String, partition: Int): Broker = { try { val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition) .getOrElse(throw new RuntimeException( s"leader not available for TopicAndPartition($topic, $partition)")) ZkUtils.getBrokerInfo(zkClient, leader) .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader")) } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def createConsumer(topic: String, partition: Int, startOffsetTime: Long): KafkaConsumer = { val broker = getBroker(topic, partition) val soTimeout = consumerConfig.socketTimeoutMs val soBufferSize = consumerConfig.socketReceiveBufferBytes val clientId = consumerConfig.clientId val fetchSize = consumerConfig.fetchMessageMaxBytes val consumer = new SimpleConsumer(broker.host, broker.port, soTimeout, soBufferSize, clientId) KafkaConsumer(topic, partition, startOffsetTime, fetchSize, consumer) } def createProducer[K, V](keySerializer: Serializer[K], valueSerializer: Serializer[V]): KafkaProducer[K, V] = { new KafkaProducer[K, V](config.getProducerConfig, keySerializer, valueSerializer) } def createTopic(topic: String, partitions: Int, replicas: Int): Boolean = { try { if (AdminUtils.topicExists(zkClient, topic)) { LOG.info(s"topic $topic exists") true } else { AdminUtils.createTopic(zkClient, topic, partitions, replicas) LOG.info(s"created topic $topic") false } } catch { case e: Exception => LOG.error(e.getMessage) throw e } } def close(): Unit = { zkClient.close() } }
Example 2
Source File: KafkaReporter.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.common.streaming.metrics import java.util.Properties import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} import org.apache.kafka.common.serialization.StringSerializer class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter { private val producer = ProducerSingleton.getInstance(bootstrapServers) override def report(startTime: Long, endTime: Long): Unit = { producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime")) } } object ProducerSingleton { @volatile private var instance : Option[KafkaProducer[String, String]] = None def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized { if (!instance.isDefined) { synchronized { if(!instance.isDefined) { val props = new Properties() props.put("bootstrap.servers", bootstrapServers) instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer)) } } } instance.get } }
Example 3
Source File: TransactionalProducer.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.util.Properties import akka.actor.Actor import akka.actor.Status.{Failure, Success} import akka.event.Logging import com.typesafe.config.Config import io.amient.affinity.Conf import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord} import io.amient.affinity.core.config.CfgStruct import io.amient.affinity.core.storage.StorageConf import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf} import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer import scala.collection.JavaConverters._ object KafkaConf extends KafkaConf { override def apply(config: Config): KafkaConf = new KafkaConf().apply(config) } class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) { val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer") val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports") val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports") } class TransactionalProducer extends Actor { val logger = Logging.getLogger(context.system, this) private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage) val producerConfig = new Properties() { if (kafkaConf.Producer.isDefined) { val producerConfig = kafkaConf.Producer.toMap() if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer") if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer") if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer") producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) => put(entry.getKey, entry.getValue.apply.toString) } } put("bootstrap.servers", kafkaConf.BootstrapServers()) put("value.serializer", classOf[ByteArraySerializer].getName) put("key.serializer", classOf[ByteArraySerializer].getName) } override def receive: Receive = { case req@TransactionBegin(transactionalId) => req(sender) ! { if (producer == null) { producerConfig.put("transactional.id", transactionalId) producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig) logger.debug(s"Transactions.Init(transactional.id = $transactionalId)") producer.initTransactions() } logger.debug("Transactions.Begin()") producer.beginTransaction() } case TransactionalRecord(topic, key, value, timestamp, partition) => val replyto = sender val producerRecord = new ProducerRecord( topic, partition.map(new Integer(_)).getOrElse(null), timestamp.map(new java.lang.Long(_)).getOrElse(null), key, value) logger.debug(s"Transactions.Append(topic=$topic)") producer.send(producerRecord, new Callback { override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = { if (exception != null) { replyto ! Failure(exception) } else { replyto ! Success(metadata.offset()) } } }) case req@TransactionCommit() => req(sender) ! { logger.debug("Transactions.Commit()") producer.commitTransaction() } case req@TransactionAbort() => req(sender) ! { logger.debug("Transactions.Abort()") producer.abortTransaction() } } }
Example 4
Source File: KafkaWordCount.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka.KafkaUtils StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. // object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 5
Source File: SessionKafkaProducer.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.{Properties, Random} import net.liftweb.json.DefaultFormats import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import net.liftweb.json.Serialization.write object SessionKafkaProducer { def main(args:Array[String]): Unit = { implicit val formats = DefaultFormats val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) val numberOfEntities = args(3).toInt val numberOfMessagesPerEntity = args(4).toInt val waitTimeBetweenMessageBatch = args(5).toInt val chancesOfMissing = args(6).toInt val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put("acks", "all") props.put("retries", "0") props.put("batch.size", "16384") props.put("linger.ms", "1") props.put("buffer.memory", "33554432") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) val r = new Random() var sentCount = 0 println("About to send to " + topic) for (j <- 0 to numberOfMessagesPerEntity) { for (i <- 0 to numberOfEntities) { if (r.nextInt(chancesOfMissing) != 0) { val message = write(HeartBeat(i.toString, System.currentTimeMillis())) val producerRecord = new ProducerRecord[String,String](topic, message) producer.send(producerRecord) sentCount += 1 } } println("Sent Count:" + sentCount) Thread.sleep(waitTimeBetweenMessageBatch) } producer.close() } }
Example 6
Source File: KafkaClient.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.interfaces.async.kafka import java.util.UUID import java.util.concurrent.atomic.AtomicBoolean import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.collection.JavaConverters._ import scala.concurrent.{Future, Promise} class TopicProducer[K, V]( producer: KafkaProducer[K, V], topic: String ) { def send(key:K, value: V): Unit = { val record = new ProducerRecord(topic, key, value) producer.send(record) } def close(): Unit = { producer.close() } } object TopicProducer { def apply( host: String, port: Int, topic: String): TopicProducer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer) new TopicProducer(producer, topic) } } class TopicConsumer[K, V]( consumer: KafkaConsumer[K, V], topic: String, timeout: Long = 100 ) { private val promise = Promise[Unit] private val stopped = new AtomicBoolean(false) def subscribe(f: (K, V) => Unit): Future[Unit] = { run(f) promise.future } private def run(f: (K, V) => Unit): Unit = { consumer.subscribe(Seq(topic).asJava) val thread = new Thread(new Runnable { override def run(): Unit = { while (!stopped.get()) { val records = consumer.poll(timeout).asScala records.foreach(r => f(r.key(), r.value())) } promise.success(()) } }) thread.setName(s"kafka-topic-consumer-$topic") thread.start() } def close(): Future[Unit] = { stopped.set(true) promise.future } } object TopicConsumer { def apply( host: String, port: Int, topic: String): TopicConsumer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") props.put("group.id", "mist-" + UUID.randomUUID().toString) props.put("enable.auto.commit", "true") props.put("auto.commit.interval.ms", "1000") props.put("session.timeout.ms", "30000") val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer) new TopicConsumer(consumer, topic) } }
Example 7
Source File: CachedKafkaProducer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.{util => ju} import java.util.concurrent.{ConcurrentMap, ExecutionException, TimeUnit} import com.google.common.cache._ import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException} import org.apache.kafka.clients.producer.KafkaProducer import scala.collection.JavaConverters._ import scala.util.control.NonFatal import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging private[kafka010] object CachedKafkaProducer extends Logging { private type Producer = KafkaProducer[Array[Byte], Array[Byte]] private lazy val cacheExpireTimeout: Long = SparkEnv.get.conf.getTimeAsMs("spark.kafka.producer.cache.timeout", "10m") private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] { override def load(config: Seq[(String, Object)]): Producer = { val configMap = config.map(x => x._1 -> x._2).toMap.asJava createKafkaProducer(configMap) } } private val removalListener = new RemovalListener[Seq[(String, Object)], Producer]() { override def onRemoval( notification: RemovalNotification[Seq[(String, Object)], Producer]): Unit = { val paramsSeq: Seq[(String, Object)] = notification.getKey val producer: Producer = notification.getValue logDebug( s"Evicting kafka producer $producer params: $paramsSeq, due to ${notification.getCause}") close(paramsSeq, producer) } } private lazy val guavaCache: LoadingCache[Seq[(String, Object)], Producer] = CacheBuilder.newBuilder().expireAfterAccess(cacheExpireTimeout, TimeUnit.MILLISECONDS) .removalListener(removalListener) .build[Seq[(String, Object)], Producer](cacheLoader) private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = { val kafkaProducer: Producer = new Producer(producerConfiguration) logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.") kafkaProducer } private def close(paramsSeq: Seq[(String, Object)], producer: Producer): Unit = { try { logInfo(s"Closing the KafkaProducer with params: ${paramsSeq.mkString("\n")}.") producer.close() } catch { case NonFatal(e) => logWarning("Error while closing kafka producer.", e) } } private def clear(): Unit = { logInfo("Cleaning up guava cache.") guavaCache.invalidateAll() } // Intended for testing purpose only. private def getAsMap: ConcurrentMap[Seq[(String, Object)], Producer] = guavaCache.asMap() }
Example 8
Source File: CachedKafkaProducerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.{util => ju} import java.util.concurrent.ConcurrentMap import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.common.serialization.ByteArraySerializer import org.scalatest.PrivateMethodTester import org.apache.spark.sql.test.SharedSQLContext class CachedKafkaProducerSuite extends SharedSQLContext with PrivateMethodTester { type KP = KafkaProducer[Array[Byte], Array[Byte]] protected override def beforeEach(): Unit = { super.beforeEach() val clear = PrivateMethod[Unit]('clear) CachedKafkaProducer.invokePrivate(clear()) } test("Should return the cached instance on calling getOrCreate with same params.") { val kafkaParams = new ju.HashMap[String, Object]() kafkaParams.put("acks", "0") // Here only host should be resolvable, it does not need a running instance of kafka server. kafkaParams.put("bootstrap.servers", "127.0.0.1:9022") kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName) kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName) val producer = CachedKafkaProducer.getOrCreate(kafkaParams) val producer2 = CachedKafkaProducer.getOrCreate(kafkaParams) assert(producer == producer2) val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap) val map = CachedKafkaProducer.invokePrivate(cacheMap()) assert(map.size == 1) } test("Should close the correct kafka producer for the given kafkaPrams.") { val kafkaParams = new ju.HashMap[String, Object]() kafkaParams.put("acks", "0") kafkaParams.put("bootstrap.servers", "127.0.0.1:9022") kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName) kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName) val producer: KP = CachedKafkaProducer.getOrCreate(kafkaParams) kafkaParams.put("acks", "1") val producer2: KP = CachedKafkaProducer.getOrCreate(kafkaParams) // With updated conf, a new producer instance should be created. assert(producer != producer2) val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap) val map = CachedKafkaProducer.invokePrivate(cacheMap()) assert(map.size == 2) CachedKafkaProducer.close(kafkaParams) val map2 = CachedKafkaProducer.invokePrivate(cacheMap()) assert(map2.size == 1) import scala.collection.JavaConverters._ val (seq: Seq[(String, Object)], _producer: KP) = map2.asScala.toArray.apply(0) assert(_producer == producer) } }
Example 9
Source File: KafkaWordCount.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 10
Source File: KafkaBatchProducer.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.kafka2.writer import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.immutable.Map import scala.language.implicitConversions import scala.reflect.runtime.universe._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants} import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException} def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = { def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName() logger.info(" @Begin --> " + MethodName) val kafkaProps: Properties = conf.kafkaProducerProps logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}") val kafkaTopic = conf.kafkaTopics val kafkaTopicsOptionsMap : Map[String, Map[String, String]] = KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf) logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap) val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap) logger.info("Begin Publishing to Kafka....") try { val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic) kafkaTopicOptions match { case None => throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""") case Some(kafkaOptions) => dataFrame .write .format(KafkaConstants.KAFKA_FORMAT) .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic) .options(kafkaOptions) .save() } } catch { case ex: Throwable => { ex.printStackTrace() val msg = s""" |kafkaTopic -> ${kafkaTopic} |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}} """.stripMargin throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}") } } logger.info("Publish to Kafka - Completed !") } }
Example 11
Source File: Producer.scala From fusion-data with Apache License 2.0 | 5 votes |
package kafkasample.demo import java.util.Properties import java.util.concurrent.TimeUnit import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata } object Producer { def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) try { run(producer) } finally { TimeUnit.SECONDS.sleep(5) producer.close() } } private def run[K, V](producer: KafkaProducer[String, String]) { val record = new ProducerRecord[String, String]("customerCountries", "羊八井222") producer.send(record, (metadata: RecordMetadata, e: Exception) => { if (e ne null) { e.printStackTrace() } println(s"metadata: $metadata") }) } }
Example 12
Source File: package.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.events import java.util import com.wavesplatform.events.protobuf.PBEvents import com.wavesplatform.events.settings.BlockchainUpdatesSettings import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.config.SaslConfigs import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer} package object kafka { private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def close(): Unit = {} override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] = PBEvents.protobuf(data).toByteArray } private object IntSerializer extends Serializer[Int] { val integerSerializer = new IntegerSerializer override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey) override def close(): Unit = integerSerializer.close() override def serialize(topic: String, data: Int): Array[Byte] = integerSerializer.serialize(topic, data) } def createProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = new util.Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers) props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId) // props.put(ProducerConfig.RETRIES_CONFIG, "0") // SASL_SSL if (settings.ssl.enabled) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL") props.put(SaslConfigs.SASL_MECHANISM, "PLAIN") props.put( SaslConfigs.SASL_JAAS_CONFIG, s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';" ) } props } def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = { val props = createProperties(settings) props.put(ProducerConfig.ACKS_CONFIG, "all") props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB props } def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] = new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer) def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = { val h = event match { case ap: BlockAppended => ap.toHeight case MicroBlockAppended(_, height, _, _, _) => height case RollbackCompleted(_, height) => height case MicroBlockRollbackCompleted(_, height) => height } new ProducerRecord[Int, BlockchainUpdated](topic, h, event) } }
Example 13
Source File: SimpleProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.producer import java.util.{Properties} import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} object SimpleProducer extends App{ val topic = "sample_topic" private val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String,String](props) try { for(i <- 0 to 10) { producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic")) println(s"Sent: $i") } println("Message sent successfully") producer.close() } catch { case ex: Exception => ex.printStackTrace() } }
Example 14
Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafka def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 15
Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.client import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer class MessageSender(val brokers: String) { import MessageSender._ val producer = new KafkaProducer[Array[Byte], Array[Byte]]( providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName)) def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get producer.flush() } def writeValue(topic: String, value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get producer.flush() } def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 16
Source File: KafkaWordCount.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ import org.apache.spark.SparkConf object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 17
Source File: KafkaProducerUtils.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import java.util.Properties import com.timeyang.common.config.BaseConf import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer object KafkaProducerUtils { @volatile lazy private val producer: KafkaProducer[String, String] = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList) props.put("acks", "all") props.put("retries", 1: Integer) props.put("batch.size", 16384: Integer) props.put("linger.ms", 1: Integer) props.put("buffer.memory", 33554432: Integer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) new KafkaProducer[String, String](props) } def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = { for (event <- event +: events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, events: List[Object]): Unit = { for (event <- events) { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } } def send(topic: String, event: Object): Unit = { val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event)) producer.send(record) } }
Example 18
Source File: CurrentDayMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.dayWindow import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 19
Source File: LateDataMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.sideoutput.lateDataProcess import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.SECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =74540; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(200) i = i + 1 // System.exit(-1) } } }
Example 20
Source File: WindowDemoMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 21
Source File: FileSinkMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.text.SimpleDateFormat import java.util.Calendar import com.venn.common.Common import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object FileSinkMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("roll_file_sink") Thread.sleep(100) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) // producer.send(msg) // producer.flush() } var minute : Int = 1 val calendar: Calendar = Calendar.getInstance() def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } }
Example 22
Source File: IntervalJoinKafkaKeyMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.intervalJoin import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object IntervalJoinKafkaKeyMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("topic_left") right("topic_right") Thread.sleep(500) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } var idRight = 0 def right(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idRight = idRight + 1 val map = Map("id" -> idRight, "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } }
Example 23
Source File: SlotPartitionMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.demo import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val prop = Common.getProp prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 24
Source File: KafkaOffsetRevertTest.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.kafka import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp(true)) var i = 0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 25
Source File: KafkaProducerConfig.scala From freestyle-kafka with Apache License 2.0 | 5 votes |
package freestyle package kafka import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.common.serialization.Serializer import collection.JavaConverters._ case class KafkaProducerConfig[K, V]( configs: Map[String, Any], keyValueSerializers: Option[(Serializer[K], Serializer[V])]) extends UnderlyingKafkaProducer[K, V] { override def producer: KafkaProducer[K, V] = KafkaProducerConfig.producerFromConfig(this) } object KafkaProducerConfig { private def toAnyRefMap(m: Map[String, Any]): java.util.Map[String, AnyRef] = m.asInstanceOf[Map[String, AnyRef]].asJava def producerFromConfig[K, V](config: KafkaProducerConfig[K, V]): KafkaProducer[K, V] = config.keyValueSerializers.fold(new KafkaProducer[K, V](toAnyRefMap(config.configs))) { case (ks, vs) => new KafkaProducer[K, V](toAnyRefMap(config.configs), ks, vs) } }
Example 26
Source File: Kafka.scala From event-sourcing-kafka-streams with MIT License | 5 votes |
package org.amitayh.invoices.web import java.time.Duration import java.util.Collections.singletonList import java.util.Properties import cats.effect._ import cats.syntax.apply._ import cats.syntax.functor._ import fs2._ import org.amitayh.invoices.common.Config import org.amitayh.invoices.common.Config.Topics.Topic import org.apache.kafka.clients.consumer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.log4s.{Logger, getLogger} import scala.collection.JavaConverters._ object Kafka { trait Producer[F[_], K, V] { def send(key: K, value: V): F[RecordMetadata] } object Producer { def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] = (key: K, value: V) => Async[F].async { cb => val record = new ProducerRecord(topic.name, key, value) producer.send(record, (metadata: RecordMetadata, exception: Exception) => { if (exception != null) cb(Left(exception)) else cb(Right(metadata)) }) } } def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource { val create = Sync[F].delay { val props = new Properties props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer) } create.map(producer => (Producer(producer, topic), close(producer))) } def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = { val create = Sync[F].delay { val props = new Properties props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers) props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId) val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer) consumer.subscribe(singletonList(topic.name)) consumer } Stream.bracket(create)(close[F]).flatMap(consume[F, K, V]) } private val logger: Logger = getLogger def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg)) private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for { records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1)))) record <- Stream.emits(records.iterator.asScala.toSeq) } yield record.key -> record.value private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] = Sync[F].delay(producer.close()) *> log(s"Producer closed") private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] = Sync[F].delay(consumer.close()) *> log("Consumer closed") }
Example 27
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 28
Source File: KafkaProducerUntil.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.common import java.util.Properties import org.apache.kafka.clients.producer.KafkaProducer object KafkaProducerUntil { def getNewProducer(brokerList:String, acks:Int, lingerMs:Int, producerType:String, batchSize:Int): KafkaProducer[String, String] = { val kafkaProps = new Properties kafkaProps.put("bootstrap.servers", brokerList) kafkaProps.put("metadata.broker.list", brokerList) kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("acks", acks.toString) kafkaProps.put("retries", "3") kafkaProps.put("producer.type", producerType) kafkaProps.put("linger.ms", lingerMs.toString) kafkaProps.put("batch.size", batchSize.toString) println("brokerList:" + brokerList) println("acks:" + acks) println("lingerMs:" + lingerMs) println("batchSize:" + batchSize) println("producerType:" + producerType) println(kafkaProps) return new KafkaProducer[String,String](kafkaProps) } }
Example 29
Source File: SimpleProducer.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License | 5 votes |
package packt.ch05 import java.util.{Date, Properties} import packt.ch05.SimpleProducer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} object SimpleProducer { private var producer: KafkaProducer[String, String] = _ def main(args: Array[String]) { val argsCount = args.length if (argsCount == 0 || argsCount == 1) throw new IllegalArgumentException( "Provide topic name and Message count as arguments") // Topic name and the message count to be published is passed from the // command line val topic = args(0) val count = args(1) val messageCount = java.lang.Integer.parseInt(count) println("Topic Name - " + topic) println("Message Count - " + messageCount) val simpleProducer = new SimpleProducer() simpleProducer.publishMessage(topic, messageCount) } } class SimpleProducer { val props = new Properties() // Set the broker list for requesting metadata to find the lead broker props.put("metadata.broker.list", "192.168.146.132:9092, 192.168.146.132:9093, 192.168 146.132:9094 ") //This specifies the serializer class for keys props.put("serializer.class", "kafka.serializer.StringEncoder") // 1 means the producer receives an acknowledgment once the lead replica // has received the data. This option provides better durability as the // client waits until the server acknowledges the request as successful. props.put("request.required.acks", "1") producer = new KafkaProducer(props) private def publishMessage(topic: String, messageCount: Int) { for (mCount <- 0 until messageCount) { val runtime = new Date().toString val msg = "Message Publishing Time - " + runtime println(msg) // Create a message val data = new ProducerRecord[String, String](topic, msg) // Publish the message producer.send(data) } // Close producer connection with broker. producer.close() } }
Example 30
Source File: SimplePartitioner.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License | 5 votes |
package packt.ch05 import java.util import kafka.utils.VerifiableProperties import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.Partitioner import org.apache.kafka.common.Cluster object SimplePartitioner { private var producer: KafkaProducer[String, String] = _ } class SimplePartitioner(props: VerifiableProperties) extends Partitioner { def partition(key: AnyRef, a_numPartitions: Int): Int = { var partition = 0 val partitionKey = key.asInstanceOf[String] val offset = partitionKey.lastIndexOf('.') if (offset > 0) { partition = java.lang.Integer.parseInt(partitionKey.substring(offset + 1)) % a_numPartitions } partition } override def partition(topic: String, key: AnyRef, keyBytes: Array[Byte], value: AnyRef, valueBytes: Array[Byte], cluster: Cluster): Int = partition(key, 10) override def close() { } override def configure(configs: util.Map[String, _]) { } }
Example 31
Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStaticDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](topic, write(stock))) } } }
Example 32
Source File: KafkaWordCount.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 33
Source File: WriteToKafka.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.kafka import java.util import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf._ import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import java.util.Properties import org.apache.spark.sql.SparkSession import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.Producer import org.apache.kafka.clients.producer.ProducerRecord import scala.collection.mutable class WriteToKafka extends ConfigurableStop{ val description: String = "Write data to kafka" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var kafka_host:String =_ var topic:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() val df = in.read() val properties:Properties = new Properties() properties.put("bootstrap.servers", kafka_host) properties.put("acks", "all") //properties.put("retries", 0) //properties.put("batch.size", 16384) //properties.put("linger.ms", 1) //properties.put("buffer.memory", 33554432) properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") var producer:Producer[String,String] = new KafkaProducer[String,String](properties) df.collect().foreach(row=>{ //var hm:util.HashMap[String,String]=new util.HashMap() //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String]))) var res:List[String]=List() row.schema.fields.foreach(f=>{ if(row.getAs(f.name)==null)res="None"::res else{ res=row.getAs(f.name).asInstanceOf[String]::res } }) val s:String=res.reverse.mkString(",") val record=new ProducerRecord[String,String](topic,s) producer.send(record) }) producer.close() } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String] //port=Integer.parseInt(MapUtil.get(map,key="port").toString) topic=MapUtil.get(map,key="topic").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true) val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true) descriptor = kafka_host :: descriptor descriptor = topic :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/kafka/WriteToKafka.png") } override def getGroup(): List[String] = { List(StopGroup.KafkaGroup.toString) } override val authorEmail: String = "[email protected]" }
Example 34
Source File: KafkaWordCount.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.basic import java.util import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount"). set("spark.streaming.receiver.writeAheadLog.enable", "true"). set("spark.streaming.kafka.maxRatePerPartition", "1000") val ssc = new StreamingContext(sparkConf, Seconds(2)) // 设置 checkpoint,这是考虑到了有 window 操作,window 操作一般是需要进行 checkpoint ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap // createStream 返回的是一个 Tuple2,具有 key,value,这里只关注 value. // 注意这里是 Receiver-based 方式(还提供了 non-receiver 模式),默认配置下,这种方式是会在 receiver 挂掉 // 丢失数据的,需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整. val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2) val words = lines.flatMap(_.split(" ")) // 统计的是 10 分钟内的单词数量,每隔 10 秒统计 1 次 val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2). filter(x => x._2 > 0) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } // 需要注意的是这里是 broker list,为 host:port,host:port 形式 val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new util.HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while (true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } }
Example 35
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 36
Source File: KafkaProducerUntil.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import java.util.Properties import org.apache.kafka.clients.producer.KafkaProducer object KafkaProducerUntil { def getNewProducer(brokerList:String, acks:Int, lingerMs:Int, producerType:String, batchSize:Int): KafkaProducer[String, String] = { val kafkaProps = new Properties kafkaProps.put("bootstrap.servers", brokerList) kafkaProps.put("metadata.broker.list", brokerList) kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("acks", acks.toString) kafkaProps.put("retries", "3") kafkaProps.put("producer.type", producerType) kafkaProps.put("linger.ms", lingerMs.toString) kafkaProps.put("batch.size", batchSize.toString) println("brokerList:" + brokerList) println("acks:" + acks) println("lingerMs:" + lingerMs) println("batchSize:" + batchSize) println("producerType:" + producerType) println(kafkaProps) return new KafkaProducer[String,String](kafkaProps) } }
Example 37
Source File: KafkaSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.kafka import com.sksamuel.exts.Logging import io.eels.schema.StructType import io.eels.{Row, SinkWriter, Sink} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} trait KafkaRowConverter[V] { def convert(row: Row): V } object KafkaRowConverter { implicit object NoopRowConverter extends KafkaRowConverter[Row] { override def convert(row: Row): Row = row } } case class KafkaSink[K, V](topic: String, producer: KafkaProducer[K, V]) (implicit partitioner: KafkaPartitioner[V], converter: KafkaRowConverter[V], keygen: KafkaKeyGen[K]) extends Sink with Logging { def open(schema: StructType): SinkWriter = { new SinkWriter { override def write(row: Row): Unit = { val key = keygen.gen(row) val value = converter.convert(row) val record = partitioner.partition(row) match { case Some(part) => new ProducerRecord[K, V](topic, part, key, value) case _ => new ProducerRecord[K, V](topic, key, value) } logger.debug(s"Sending record $record") producer.send(record) producer.flush() } override def close(): Unit = producer.close() } } }
Example 38
Source File: KafkaSinkTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.kafka import java.util import java.util.{Properties, UUID} import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema.{Field, StringType, StructType} import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig} import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.common.serialization.{Deserializer, Serializer} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.collection.JavaConverters._ import scala.util.Try class KafkaSinkTest extends FlatSpec with Matchers with BeforeAndAfterAll { implicit val kafkaConfig = EmbeddedKafkaConfig( kafkaPort = 6001, zooKeeperPort = 6000 ) Try { EmbeddedKafka.start() } val schema = StructType( Field("name", StringType, nullable = true), Field("location", StringType, nullable = true) ) val ds = DataStream.fromValues( schema, Seq( Vector("clint eastwood", UUID.randomUUID().toString), Vector("elton john", UUID.randomUUID().toString) ) ) "KafkaSink" should "support default implicits" ignore { val topic = "mytopic-" + System.currentTimeMillis() val properties = new Properties() properties.put("bootstrap.servers", s"localhost:${kafkaConfig.kafkaPort}") properties.put("group.id", "test") properties.put("auto.offset.reset", "earliest") val producer = new KafkaProducer[String, Row](properties, StringSerializer, RowSerializer) val sink = KafkaSink(topic, producer) val consumer = new KafkaConsumer[String, String](properties, StringDeserializer, StringDeserializer) consumer.subscribe(util.Arrays.asList(topic)) ds.to(sink) producer.close() val records = consumer.poll(4000) records.iterator().asScala.map(_.value).toList shouldBe ds.collect.map { case Row(_, values) => values.mkString(",") }.toList } } object RowSerializer extends Serializer[Row] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def serialize(topic: String, data: Row): Array[Byte] = data.values.mkString(",").getBytes override def close(): Unit = () } object StringSerializer extends Serializer[String] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def close(): Unit = () override def serialize(topic: String, data: String): Array[Byte] = data.getBytes } object StringDeserializer extends Deserializer[String] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def close(): Unit = () override def deserialize(topic: String, data: Array[Byte]): String = new String(data) }
Example 39
Source File: KafkaMessagingSystem.scala From amadou with Apache License 2.0 | 5 votes |
package com.mediative.amadou package monitoring import java.util.Properties import com.typesafe.config.Config import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} class KafkaMessagingSystem(config: Config) extends MessagingSystem with Logging { private val properties = KafkaMessagingSystem.readProperties(config) private val producer = new KafkaProducer[String, String](properties) private val topicPrefix = properties.getProperty("topic.prefix") override def publish(topic: String, message: String): Unit = { val topicName = s"$topicPrefix-$topic" logger.info(s"Publishing to $topicName :\n$message\n") producer.send(new ProducerRecord[String, String](topicName, message), new Callback { override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = if (exception != null) { logger .error(s"Cannot publish to $topicName. Caused by: ${exception.getMessage}", exception) } }) () } override def stop(): Unit = producer.close() } object KafkaMessagingSystem { def readProperties(config: Config): Properties = { val propertiesKeys = Seq( "bootstrap.servers", "acks", "retries", "batch.size", "linger.ms", "buffer.memory", "key.serializer", "value.serializer", "topic.prefix") val properties = new Properties() propertiesKeys.foreach(key => properties.setProperty(key, config.getString(key))) properties } }
Example 40
Source File: EventProducer.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.rokku.proxy.provider.kafka import akka.Done import akka.http.scaladsl.model.HttpMethod import com.ing.wbaa.rokku.proxy.config.KafkaSettings import com.ing.wbaa.rokku.proxy.data.RequestId import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata } import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.{ ExecutionContext, Future } trait EventProducer { private val logger = new LoggerHandlerWithId import scala.collection.JavaConverters._ protected[this] implicit val kafkaSettings: KafkaSettings protected[this] implicit val executionContext: ExecutionContext private lazy val config: Map[String, Object] = Map[String, Object]( "bootstrap.servers" -> kafkaSettings.bootstrapServers, ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries, ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff, ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax, CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol, ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock, ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs, "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation, "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword, "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation, "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword, "ssl.key.password" -> kafkaSettings.sslKeyPassword ) private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer) def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = { kafkaProducer .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => { exception match { case e: Exception => MetricsFactory.incrementKafkaSendErrors logger.error("error in sending event {} to topic {}, error={}", event, topic, e) throw new Exception(e) case _ => httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) } logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset()) } }) match { case _ => Future(Done) } } }
Example 41
Source File: KafkaTransmitter.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.transmitters import java.util.Properties import akka.actor.{ActorLogging, Props} import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import scala.sys.SystemProperties import com.typesafe.config.Config object KafkaTransmitter { def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic)) } class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging { private val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers")) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer")) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer")) // Enable settings for a secure environment, if necessary. // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html val systemProperties = new SystemProperties if (config.getBoolean("transmitter.kafka.security-enabled")) { props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol")) systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file")) } private val producer = new KafkaProducer[String, String](props) def receive = { case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV)) } override def postStop(): Unit = { producer.close() log.info("KafkaTransmitter closed its producer.") } }
Example 42
Source File: ActionsHandler.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import java.sql.Timestamp import org.apache.spark.sql.types.StructType import java.util.concurrent.atomic.AtomicInteger def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries; def destroy(); } trait ActionsHandlerFactory { def createInstance(params: Params): ActionsHandler; } abstract class AbstractActionsHandler extends ActionsHandler { def getRequiredParam(requestBody: Map[String, Any], key: String): Any = { val opt = requestBody.get(key); if (opt.isEmpty) { throw new MissingRequiredRequestParameterException(key); } opt.get; } override def destroy() = { } } class NullActionsHandler extends AbstractActionsHandler { override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() { def apply(action: String) = Map[String, Any](); //yes, do nothing def isDefinedAt(action: String) = false; }; } //rich row with extra info: id, time stamp, ... case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) { def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp); def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch"); def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) }; } trait SendStreamActionSupport { def onReceiveStream(topic: String, rows: Array[RowEx]); def getRequiredParam(requestBody: Map[String, Any], key: String): Any; val listeners = ArrayBuffer[StreamListener](); def addListener(listener: StreamListener): this.type = { listeners += listener; this; } protected def notifyListeners(topic: String, data: Array[RowEx]) { listeners.foreach { _.onArrive(topic, data); } } def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = { val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String]; val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long]; val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]]; val ts = new Timestamp(System.currentTimeMillis()); var index = -1; val rows2 = rows.map { row ⇒ index += 1; RowEx(Row.fromSeq(row.toSeq), batchId, index, ts) } onReceiveStream(topic, rows2); notifyListeners(topic, rows2); Map("rowsCount" -> rows.size); } }
Example 43
Source File: KafkaAsReceiver.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.util.Properties import org.apache.kafka.clients.producer.Callback import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.producer.RecordMetadata import org.apache.spark.internal.Logging class KafkaAsReceiver(bootstrapServers: String) extends AbstractActionsHandler with SendStreamActionSupport with Logging { val props = new Properties(); props.put("bootstrap.servers", bootstrapServers); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); val producer = new KafkaProducer[String, String](props); override def listActionHandlerEntries(requestBody: Map[String, Any]): PartialFunction[String, Map[String, Any]] = { case "actionSendStream" ⇒ handleSendStream(requestBody); } override def destroy() { producer.close(); } override def onReceiveStream(topic: String, rows: Array[RowEx]) = { var index = -1; for (row ← rows) { index += 1; val key = "" + row.batchId + "-" + row.offsetInBatch; //TODO: send an array instead of a string value? val value = row.originalRow(0).toString(); val record = new ProducerRecord[String, String](topic, key, value); producer.send(record, new Callback() { def onCompletion(metadata: RecordMetadata, e: Exception) = { if (e != null) { e.printStackTrace(); logError(e.getMessage); } else { val offset = metadata.offset(); val partition = metadata.partition(); logDebug(s"record is sent to kafka:key=$key, value=$value, partition=$partition, offset=$offset"); } } }); } } } class KafkaAsReceiverFactory extends ActionsHandlerFactory { def createInstance(params: Params) = new KafkaAsReceiver(params.getRequiredString("bootstrapServers")); }
Example 44
Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config._ import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStreamDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming companies listed into Kafka...") system.scheduler.schedule(0 seconds, 20 seconds) { randomCompanyNames.foreach { name => producer.send(new ProducerRecord[String, String](companiesTopic, name)) } } info("Streaming stocks data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](stocksTopic, write(stock))) } } }
Example 45
Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object DataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val someWords = List("about", "above", "after", "again", "against") info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 200 milliseconds) { Random.shuffle(someWords).headOption.foreach { word => producer.send(new ProducerRecord[String, String](topic, word)) } } }
Example 46
Source File: ExampleExternalStateSpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.example import java.util.Properties import com.typesafe.config.ConfigFactory import io.amient.affinity.core.cluster.Node import io.amient.affinity.core.util.AffinityTestBase import io.amient.affinity.kafka.EmbeddedKafka import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.scalatest.concurrent.TimeLimitedTests import org.scalatest.time.{Millis, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.collection.JavaConverters._ class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll with TimeLimitedTests { override def numPartitions = 2 val config = configure(ConfigFactory.load("example-external-state")) val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic") val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap))) override def beforeAll: Unit = try { createTopic(topic) val externalProducer = createKafkaAvroProducer[String, String]() try { externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding")) externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding")) externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again")) externalProducer.flush() } finally { externalProducer.close() } //the external fixture is produced and the externalProducer is flushed() before the node is started node.start() node.awaitClusterReady() //at this point all stores have loaded everything available in the external topic so the test will be deterministic } finally { super.beforeAll() } override def afterAll: Unit = try { node.shutdown() } finally { super.afterAll() } behavior of "External State" val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time it should "start automatically tailing state partitions on startup even when master" in { //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic val response = node.get_text(node.http_get(s"/news/latest")) response should include("10:30\tthe universe is expanding") response should include("11:00\tthe universe is still expanding") response should include("11:30\tthe universe briefly contracted but is expanding again") } private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties { put("bootstrap.servers", kafkaBootstrap) put("acks", "1") put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") //this simply adds all configs required by KafkaAvroSerializer config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) => put(entry.getKey, entry.getValue.unwrapped()) } }) }
Example 47
Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.language.postfixOps import scala.util.Random object MultiDataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 3000 milliseconds) { (1 to Random.nextInt(100)).foreach { id => producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString)) } } }
Example 48
Source File: KafkaOutput.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.output.kafka import java.io.{Serializable => JSerializable} import java.util.Properties import com.stratio.sparta.plugin.input.kafka.KafkaBase import com.stratio.sparta.sdk.pipeline.output.Output._ import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum} import com.stratio.sparta.sdk.properties.CustomProperties import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._ import org.apache.kafka.clients.producer.ProducerConfig._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.apache.spark.sql._ import scala.collection.mutable class KafkaOutput(name: String, properties: Map[String, JSerializable]) extends Output(name, properties) with KafkaBase with CustomProperties { val DefaultKafkaSerializer = classOf[StringSerializer].getName val DefaultAck = "0" val DefaultBatchNumMessages = "200" val DefaultProducerPort = "9092" override val customKey = "KafkaProperties" override val customPropertyKey = "kafkaPropertyKey" override val customPropertyValue = "kafkaPropertyValue" val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase) val rowSeparator = properties.getString("rowSeparator", ",") override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append) override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = { val tableName = getTableNameFromOptions(options) validateSaveMode(saveMode) outputFormat match { case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages => messages.foreach(message => send(tableName, message.mkString(rowSeparator)))) case _ => dataFrame.toJSON.foreachPartition { messages => messages.foreach(message => send(tableName, message)) } } } def send(topic: String, message: String): Unit = { val record = new ProducerRecord[String, String](topic, message) KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record) } private[kafka] def getProducerConnectionKey: String = getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list")) private[kafka] def createProducerProps: Properties = { val props = new Properties() properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) } mandatoryOptions.foreach { case (key, value) => props.put(key, value) } getCustomProperties.foreach { case (key, value) => props.put(key, value) } props } private[kafka] def mandatoryOptions: Map[String, String] = getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++ Map( KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer), VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer), ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck), BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages) ) override def cleanUp(options: Map[String, String]): Unit = { log.info(s"Closing Kafka producer in Kafka Output: $name") KafkaOutput.closeProducers() } } object KafkaOutput { private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = { getInstance(producerKey, properties) } def closeProducers(): Unit = { producers.values.foreach(producer => producer.close()) } private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = { producers.getOrElse(key, { val producer = new KafkaProducer[String, String](properties) producers.put(key, producer) producer }) } }
Example 49
Source File: KafkaProducerInjector.scala From SparkOnKudu with Apache License 2.0 | 5 votes |
package org.kududb.spark.demo.gamer.aggregates import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} object KafkaProducerInjector { def main(args:Array[String]): Unit = { if (args.length == 0) { println("{brokerList} {topic} {#OfRecords} {sleepTimeEvery10Records} {#OfGamers}") return } val brokerList = args(0) val topic = args(1) val numOfRecords = args(2).toInt val sleepTimeEvery10Records = args(3).toInt val numOfGamers = args(4).toInt val producer = getNewProducer(brokerList) for (i <- 0 until numOfRecords) { val gamerRecord = GamerDataGenerator.makeNewGamerRecord(numOfGamers) val message = new ProducerRecord[String, String](topic, gamerRecord.gamerId.toString, gamerRecord.toString()) producer.send(message) if (i % 10 == 0) { Thread.sleep(sleepTimeEvery10Records) print(".") } if (i % 2000 == 0) { println() println("Records Sent:" + i) println() } } } def getNewProducer(brokerList:String): KafkaProducer[String, String] = { val kafkaProps = new Properties kafkaProps.put("bootstrap.servers", brokerList) kafkaProps.put("metadata.broker.list", brokerList) // This is mandatory, even though we don't send keys kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("acks", "0") // how many times to retry when produce request fails? kafkaProps.put("retries", "3") kafkaProps.put("linger.ms", "2") kafkaProps.put("batch.size", "1000") kafkaProps.put("queue.time", "2") new KafkaProducer[String, String](kafkaProps) } }
Example 50
Source File: NumericalDataProducer.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.integrationtest.kafka import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer import org.apache.log4j.Logger import org.apache.gearpump.streaming.serializer.ChillSerializer class NumericalDataProducer(topic: String, bootstrapServers: String) { private val LOG = Logger.getLogger(getClass) private val producer = createProducer private val WRITE_SLEEP_NANOS = 10 private val serializer = new ChillSerializer[Int] var lastWriteNum = 0 def start(): Unit = { produceThread.start() } def stop(): Unit = { if (produceThread.isAlive) { produceThread.interrupt() produceThread.join() } producer.close() } def producedNumbers: Range = { Range(1, lastWriteNum + 1) } private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = { val properties = new Properties() properties.setProperty("bootstrap.servers", bootstrapServers) new KafkaProducer[Array[Byte], Array[Byte]](properties, new ByteArraySerializer, new ByteArraySerializer) } private val produceThread = new Thread(new Runnable { override def run(): Unit = { try { while (!Thread.currentThread.isInterrupted) { lastWriteNum += 1 val msg = serializer.serialize(lastWriteNum) val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg) producer.send(record) Thread.sleep(0, WRITE_SLEEP_NANOS) } } catch { case ex: InterruptedException => LOG.error("message producing is stopped by an interrupt") } } }) }
Example 51
Source File: AbstractKafkaSink.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.sink import java.util.Properties import org.apache.gearpump.Message import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.sink.DataSink import org.apache.gearpump.streaming.task.TaskContext import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer object AbstractKafkaSink { private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink]) val producerFactory = new KafkaProducerFactory { override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = { new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig, new ByteArraySerializer, new ByteArraySerializer) } } trait KafkaProducerFactory extends java.io.Serializable { def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] } } abstract class AbstractKafkaSink private[kafka]( topic: String, props: Properties, kafkaConfigFactory: KafkaConfigFactory, factory: KafkaProducerFactory) extends DataSink { import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._ def this(topic: String, props: Properties) = { this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory) } private lazy val config = kafkaConfigFactory.getKafkaConfig(props) // Lazily construct producer since KafkaProducer is not serializable private lazy val producer = factory.getKafkaProducer(config) override def open(context: TaskContext): Unit = { LOG.info("KafkaSink opened") } override def write(message: Message): Unit = { message.value match { case (k: Array[Byte], v: Array[Byte]) => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case v: Array[Byte] => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case m => val errorMsg = s"unexpected message type ${m.getClass}; " + s"Array[Byte] or (Array[Byte], Array[Byte]) required" LOG.error(errorMsg) } } override def close(): Unit = { producer.close() LOG.info("KafkaSink closed") } }
Example 52
Source File: KafkaStore.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.store import java.util.Properties import com.twitter.bijection.Injection import kafka.api.OffsetRequest import org.apache.gearpump.Time.MilliSeconds import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory} import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer class KafkaStore private[kafka]( val topic: String, val producer: KafkaProducer[Array[Byte], Array[Byte]], val optConsumer: Option[KafkaConsumer]) extends CheckpointStore { import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._ private var maxTime: MilliSeconds = 0L override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = { // make sure checkpointed timestamp is monotonically increasing // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3) if (time > maxTime) { maxTime = time } val key = maxTime val value = checkpoint val message = new ProducerRecord[Array[Byte], Array[Byte]]( topic, 0, Injection[Long, Array[Byte]](key), value) producer.send(message) LOG.debug("KafkaStore persisted state ({}, {})", key, value) } override def recover(time: MilliSeconds): Option[Array[Byte]] = { var checkpoint: Option[Array[Byte]] = None optConsumer.foreach { consumer => while (consumer.hasNext && checkpoint.isEmpty) { val kafkaMsg = consumer.next() checkpoint = for { k <- kafkaMsg.key t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption c = kafkaMsg.msg if t >= time } yield c } consumer.close() } checkpoint match { case Some(c) => LOG.info(s"KafkaStore recovered checkpoint ($time, $c)") case None => LOG.info(s"no checkpoint existing for $time") } checkpoint } override def close(): Unit = { producer.close() LOG.info("KafkaStore closed") } }
Example 53
Source File: KafkaSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka import java.util.Properties import com.twitter.bijection.Injection import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} import org.apache.gearpump.Message import org.apache.gearpump.streaming.MockUtil class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { val dataGen = for { topic <- Gen.alphaStr key <- Gen.alphaStr msg <- Gen.alphaStr } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg)) property("KafkaSink write should send producer record") { forAll(dataGen) { (data: (String, Array[Byte], Array[Byte])) => val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val (topic, key, msg) = data val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory) kafkaSink.write(Message((key, msg))) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg))) kafkaSink.write(Message(msg)) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg) )) kafkaSink.close() } } property("KafkaSink close should close kafka producer") { val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory) kafkaSink.close() verify(producer).close() } }
Example 54
Source File: KafkaWordCount.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 55
Source File: KafkaPublisher.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} class KafkaPublisher { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("partition.assignment.strategy", "range") props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") val producer = new KafkaProducer[Array[Byte], Array[Byte]](props) def send(topic: String, event: Array[Byte]): Unit = { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } def send(topic: String, events: List[Array[Byte]]): Unit = { for (event <- events) { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } } }
Example 56
Source File: ExternalKafkaProcessorSupplier.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.kstream.processor import com.expedia.metrics.MetricData import com.expedia.www.haystack.trends.config.entities.KafkaProduceConfiguration import com.expedia.www.haystack.trends.kstream.serde.TrendMetricSerde.metricRegistry import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import org.apache.kafka.streams.processor.{AbstractProcessor, Processor, ProcessorContext, ProcessorSupplier} import org.slf4j.LoggerFactory class ExternalKafkaProcessorSupplier(kafkaProduceConfig: KafkaProduceConfiguration) extends ProcessorSupplier[String, MetricData] { private val LOGGER = LoggerFactory.getLogger(this.getClass) private val metricPointExternalKafkaSuccessMeter = metricRegistry.meter("metricpoint.kafka-external.success") private val metricPointExternalKafkaFailureMeter = metricRegistry.meter("metricpoint.kafka-external.failure") def get: Processor[String, MetricData] = { new ExternalKafkaProcessor(kafkaProduceConfig: KafkaProduceConfiguration) } def process(key: String, value: MetricData): Unit = { val kafkaMessage = new ProducerRecord(kafkaProduceTopic, key, value) kafkaProducer.send(kafkaMessage, new Callback { override def onCompletion(recordMetadata: RecordMetadata, e: Exception): Unit = { if (e != null) { LOGGER.error(s"Failed to produce the message to kafka for topic=$kafkaProduceTopic, with reason=", e) metricPointExternalKafkaFailureMeter.mark() } else { metricPointExternalKafkaSuccessMeter.mark() } } }) } } }
Example 57
Source File: BgTestHelpers.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.bg.test import java.util.Properties import cmwell.driver.Dao import cmwell.fts.FTSService import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import org.apache.kafka.clients.producer.KafkaProducer import org.elasticsearch.action.ActionListener import org.elasticsearch.action.admin.indices.create.{CreateIndexRequest, CreateIndexResponse} import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequest import org.elasticsearch.action.support.master.AcknowledgedResponse import org.elasticsearch.common.xcontent.XContentType import concurrent.duration._ import scala.concurrent.{Await, Promise} import scala.io.Source object BgTestHelpers { def kafkaProducer(bootstrapServers: String)= { val producerProperties = new Properties producerProperties.put("bootstrap.servers", bootstrapServers) producerProperties.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") producerProperties.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") new KafkaProducer[Array[Byte], Array[Byte]](producerProperties) } def dao(address: String, port: Int) = { // scalastyle:off val initCommands = Some(List( "CREATE KEYSPACE IF NOT EXISTS data2 WITH REPLICATION = {'class' : 'SimpleStrategy', 'replication_factor' : 1};", "CREATE TABLE IF NOT EXISTS data2.Path ( path text, uuid text, last_modified timestamp, PRIMARY KEY ( path, last_modified, uuid ) ) WITH CLUSTERING ORDER BY (last_modified DESC, uuid ASC) AND compression = { 'class' : 'LZ4Compressor' } AND caching = {'keys':'ALL', 'rows_per_partition':'1'};", "CREATE TABLE IF NOT EXISTS data2.Infoton (uuid text, quad text, field text, value text, data blob, PRIMARY KEY (uuid,quad,field,value)) WITH compression = { 'class' : 'LZ4Compressor' } AND caching = {'keys':'ALL', 'rows_per_partition':'1000'};" )) // scalastyle:on Dao("Test","data2", address, port, initCommands = initCommands) } def ftsOverridesConfig(address: String, port: Int) = { ConfigFactory.load() .withValue("ftsService.clusterName", ConfigValueFactory.fromAnyRef("docker-cluster")) .withValue("ftsService.transportAddress", ConfigValueFactory.fromIterable(java.util.Arrays.asList(address))) .withValue("ftsService.transportPort", ConfigValueFactory.fromAnyRef(port)) } def initFTSService(ftsService: FTSService) = { val putTemplateRequest = new PutIndexTemplateRequest("indices_template") val indicesTemplateStr = { val templateSource = Source.fromURL(this.getClass.getResource("/indices_template.json")) try templateSource.getLines.mkString("\n") finally templateSource.close() } putTemplateRequest.source(indicesTemplateStr, XContentType.JSON) val putTemplatePromise = Promise[AcknowledgedResponse]() ftsService.client.admin().indices().putTemplate(putTemplateRequest, new ActionListener[AcknowledgedResponse] { override def onResponse(response: AcknowledgedResponse): Unit = putTemplatePromise.success(response) override def onFailure(e: Exception): Unit = putTemplatePromise.failure(e) }) val putTemplateAck = Await.result(putTemplatePromise.future, 1.minute) if (!putTemplateAck.isAcknowledged) throw new Exception("ES didn't acknowledge the put template request") val createIndexPromise = Promise[AcknowledgedResponse]() ftsService.client.admin().indices().create(new CreateIndexRequest("cm_well_p0_0"), new ActionListener[CreateIndexResponse] { override def onResponse(response: CreateIndexResponse): Unit = createIndexPromise.success(response) override def onFailure(e: Exception): Unit = createIndexPromise.failure(e) }) val createIndexResponse = Await.result(putTemplatePromise.future, 1.minute) if (!createIndexResponse.isAcknowledged) throw new Exception("ES didn't acknowledge the create index request") } }
Example 58
Source File: KafkaWordCount.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.util.HashMap import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.spark.SparkConf import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ object KafkaWordCount { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>") System.exit(1) } StreamingExamples.setStreamingLogLevels() val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2) wordCounts.print() ssc.start() ssc.awaitTermination() } } // Produces some random words between 1 and 100. object KafkaWordCountProducer { def main(args: Array[String]) { if (args.length < 4) { System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " + "<messagesPerSec> <wordsPerMessage>") System.exit(1) } val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } } } // scalastyle:on println
Example 59
Source File: StreamingProducerApp.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import cats.effect.{ExitCode, IO, IOApp} import com.pusher.client.Pusher import StreamingProducer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.collection.JavaConversions._ object StreamingProducerApp extends IOApp { val topic = "transactions" val pusher = new Pusher("de504dc5763aeef9ff52") val props = Map( "bootstrap.servers" -> "localhost:9092", "key.serializer" -> "org.apache.kafka.common.serialization.IntegerSerializer", "value.serializer" -> "org.apache.kafka.common.serialization.StringSerializer") def run(args: List[String]): IO[ExitCode] = { val kafkaProducer = new KafkaProducer[Int, String](props) subscribe(pusher) { wsTx => val tx = convertWsTransaction(deserializeWebsocketTransaction(wsTx)) val jsonTx = serializeTransaction(tx) kafkaProducer.send(new ProducerRecord(topic, tx.tid, jsonTx)) }.flatMap(_ => IO.never) } }