org.apache.kafka.clients.producer.KafkaProducer Scala Examples

The following examples show how to use org.apache.kafka.clients.producer.KafkaProducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaClient.scala    From incubator-retired-gearpump   with Apache License 2.0 6 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.util

import kafka.admin.AdminUtils
import kafka.cluster.Broker
import kafka.common.TopicAndPartition
import kafka.consumer.SimpleConsumer
import kafka.utils.{ZKStringSerializer, ZkUtils}
import org.I0Itec.zkclient.ZkClient
import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.Serializer

object KafkaClient {
  private val LOG = LogUtil.getLogger(classOf[KafkaClient])

  val factory = new KafkaClientFactory

  class KafkaClientFactory extends java.io.Serializable {
    def getKafkaClient(config: KafkaConfig): KafkaClient = {
      val consumerConfig = config.getConsumerConfig
      val zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs,
        consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer)
      new KafkaClient(config, zkClient)
    }
  }
}

class KafkaClient(config: KafkaConfig, zkClient: ZkClient) {
  import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient._

  private val consumerConfig = config.getConsumerConfig

  def getTopicAndPartitions(consumerTopics: List[String]): Array[TopicAndPartition] = {
    try {
      ZkUtils.getPartitionsForTopics(zkClient, consumerTopics).flatMap {
        case (topic, partitions) => partitions.map(TopicAndPartition(topic, _))
      }.toArray
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def getBroker(topic: String, partition: Int): Broker = {
    try {
      val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition)
        .getOrElse(throw new RuntimeException(
          s"leader not available for TopicAndPartition($topic, $partition)"))
      ZkUtils.getBrokerInfo(zkClient, leader)
        .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader"))
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def createConsumer(topic: String, partition: Int, startOffsetTime: Long): KafkaConsumer = {
    val broker = getBroker(topic, partition)
    val soTimeout = consumerConfig.socketTimeoutMs
    val soBufferSize = consumerConfig.socketReceiveBufferBytes
    val clientId = consumerConfig.clientId
    val fetchSize = consumerConfig.fetchMessageMaxBytes
    val consumer = new SimpleConsumer(broker.host, broker.port, soTimeout, soBufferSize, clientId)
    KafkaConsumer(topic, partition, startOffsetTime, fetchSize, consumer)
  }

  def createProducer[K, V](keySerializer: Serializer[K],
      valueSerializer: Serializer[V]): KafkaProducer[K, V] = {
    new KafkaProducer[K, V](config.getProducerConfig, keySerializer, valueSerializer)
  }

  
  def createTopic(topic: String, partitions: Int, replicas: Int): Boolean = {
    try {
      if (AdminUtils.topicExists(zkClient, topic)) {
        LOG.info(s"topic $topic exists")
        true
      } else {
        AdminUtils.createTopic(zkClient, topic, partitions, replicas)
        LOG.info(s"created topic $topic")
        false
      }
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def close(): Unit = {
    zkClient.close()
  }
} 
Example 2
Source File: KafkaReporter.scala    From Swallow   with Apache License 2.0 5 votes vote down vote up
package com.intel.hibench.common.streaming.metrics

import java.util.Properties

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}
import org.apache.kafka.common.serialization.StringSerializer


class KafkaReporter(topic: String, bootstrapServers: String) extends LatencyReporter {

  private val producer = ProducerSingleton.getInstance(bootstrapServers)

  override def report(startTime: Long, endTime: Long): Unit = {
    producer.send(new ProducerRecord[String, String](topic, null, s"$startTime:$endTime"))
  }
}

object ProducerSingleton {
  @volatile private var instance : Option[KafkaProducer[String, String]] = None

  def getInstance(bootstrapServers: String): KafkaProducer[String, String] = synchronized {
    if (!instance.isDefined) {
      synchronized {
        if(!instance.isDefined) {
          val props = new Properties()
          props.put("bootstrap.servers", bootstrapServers)
          instance = Some(new KafkaProducer(props, new StringSerializer, new StringSerializer))
        }
      }
    }
    instance.get
  }
} 
Example 3
Source File: TransactionalProducer.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.kafka

import java.util.Properties

import akka.actor.Actor
import akka.actor.Status.{Failure, Success}
import akka.event.Logging
import com.typesafe.config.Config
import io.amient.affinity.Conf
import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord}
import io.amient.affinity.core.config.CfgStruct
import io.amient.affinity.core.storage.StorageConf
import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf}
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer

import scala.collection.JavaConverters._

object KafkaConf extends KafkaConf {
  override def apply(config: Config): KafkaConf = new KafkaConf().apply(config)
}

class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) {
  val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer")
  val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports")
  val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports")
}

class TransactionalProducer extends Actor {

  val logger = Logging.getLogger(context.system, this)

  private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null

  val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage)
  val producerConfig = new Properties() {
    if (kafkaConf.Producer.isDefined) {
      val producerConfig = kafkaConf.Producer.toMap()
      if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer")
      if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer")
      if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer")
      producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) =>
        put(entry.getKey, entry.getValue.apply.toString)
      }
    }
    put("bootstrap.servers", kafkaConf.BootstrapServers())
    put("value.serializer", classOf[ByteArraySerializer].getName)
    put("key.serializer", classOf[ByteArraySerializer].getName)
  }

  override def receive: Receive = {

    case req@TransactionBegin(transactionalId) => req(sender) ! {
      if (producer == null) {
        producerConfig.put("transactional.id", transactionalId)
        producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig)
        logger.debug(s"Transactions.Init(transactional.id = $transactionalId)")
        producer.initTransactions()
      }
      logger.debug("Transactions.Begin()")
      producer.beginTransaction()
    }

    case TransactionalRecord(topic, key, value, timestamp, partition) =>
      val replyto = sender
      val producerRecord = new ProducerRecord(
        topic,
        partition.map(new Integer(_)).getOrElse(null),
        timestamp.map(new java.lang.Long(_)).getOrElse(null),
        key,
        value)
      logger.debug(s"Transactions.Append(topic=$topic)")
      producer.send(producerRecord, new Callback {
        override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
          if (exception != null) {
            replyto ! Failure(exception)
          } else {
            replyto ! Success(metadata.offset())
          }
        }
      })

    case req@TransactionCommit() => req(sender) ! {
      logger.debug("Transactions.Commit()")
      producer.commitTransaction()
    }

    case req@TransactionAbort() => req(sender) ! {
      logger.debug("Transactions.Abort()")
      producer.abortTransaction()
    }
  }
} 
Example 4
Source File: KafkaWordCount.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils



    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = Array("localhost:2181","","topic1,topic2,topic3,topic4","1")//args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
//
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 5
Source File: SessionKafkaProducer.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.session

import java.util.{Properties, Random}

import net.liftweb.json.DefaultFormats
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import net.liftweb.json.Serialization.write

object SessionKafkaProducer {
  def main(args:Array[String]): Unit = {

    implicit val formats = DefaultFormats

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)
    val numberOfEntities = args(3).toInt
    val numberOfMessagesPerEntity = args(4).toInt
    val waitTimeBetweenMessageBatch = args(5).toInt
    val chancesOfMissing = args(6).toInt

    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put("acks", "all")
    props.put("retries", "0")
    props.put("batch.size", "16384")
    props.put("linger.ms", "1")
    props.put("buffer.memory", "33554432")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    val r = new Random()
    var sentCount = 0

    println("About to send to " + topic)
    for (j <- 0 to numberOfMessagesPerEntity) {
      for (i <- 0 to numberOfEntities) {
        if (r.nextInt(chancesOfMissing) != 0) {
          val message = write(HeartBeat(i.toString, System.currentTimeMillis()))
          val producerRecord = new ProducerRecord[String,String](topic, message)
          producer.send(producerRecord)
          sentCount += 1
        }
      }
      println("Sent Count:" + sentCount)
      Thread.sleep(waitTimeBetweenMessageBatch)
    }

    producer.close()
  }
} 
Example 6
Source File: KafkaClient.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

} 
Example 7
Source File: CachedKafkaProducer.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}
import java.util.concurrent.{ConcurrentMap, ExecutionException, TimeUnit}

import com.google.common.cache._
import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
import org.apache.kafka.clients.producer.KafkaProducer
import scala.collection.JavaConverters._
import scala.util.control.NonFatal

import org.apache.spark.SparkEnv
import org.apache.spark.internal.Logging

private[kafka010] object CachedKafkaProducer extends Logging {

  private type Producer = KafkaProducer[Array[Byte], Array[Byte]]

  private lazy val cacheExpireTimeout: Long =
    SparkEnv.get.conf.getTimeAsMs("spark.kafka.producer.cache.timeout", "10m")

  private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] {
    override def load(config: Seq[(String, Object)]): Producer = {
      val configMap = config.map(x => x._1 -> x._2).toMap.asJava
      createKafkaProducer(configMap)
    }
  }

  private val removalListener = new RemovalListener[Seq[(String, Object)], Producer]() {
    override def onRemoval(
        notification: RemovalNotification[Seq[(String, Object)], Producer]): Unit = {
      val paramsSeq: Seq[(String, Object)] = notification.getKey
      val producer: Producer = notification.getValue
      logDebug(
        s"Evicting kafka producer $producer params: $paramsSeq, due to ${notification.getCause}")
      close(paramsSeq, producer)
    }
  }

  private lazy val guavaCache: LoadingCache[Seq[(String, Object)], Producer] =
    CacheBuilder.newBuilder().expireAfterAccess(cacheExpireTimeout, TimeUnit.MILLISECONDS)
      .removalListener(removalListener)
      .build[Seq[(String, Object)], Producer](cacheLoader)

  private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = {
    val kafkaProducer: Producer = new Producer(producerConfiguration)
    logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.")
    kafkaProducer
  }

  
  private def close(paramsSeq: Seq[(String, Object)], producer: Producer): Unit = {
    try {
      logInfo(s"Closing the KafkaProducer with params: ${paramsSeq.mkString("\n")}.")
      producer.close()
    } catch {
      case NonFatal(e) => logWarning("Error while closing kafka producer.", e)
    }
  }

  private def clear(): Unit = {
    logInfo("Cleaning up guava cache.")
    guavaCache.invalidateAll()
  }

  // Intended for testing purpose only.
  private def getAsMap: ConcurrentMap[Seq[(String, Object)], Producer] = guavaCache.asMap()
} 
Example 8
Source File: CachedKafkaProducerSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}
import java.util.concurrent.ConcurrentMap

import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.scalatest.PrivateMethodTester

import org.apache.spark.sql.test.SharedSQLContext

class CachedKafkaProducerSuite extends SharedSQLContext with PrivateMethodTester {

  type KP = KafkaProducer[Array[Byte], Array[Byte]]

  protected override def beforeEach(): Unit = {
    super.beforeEach()
    val clear = PrivateMethod[Unit]('clear)
    CachedKafkaProducer.invokePrivate(clear())
  }

  test("Should return the cached instance on calling getOrCreate with same params.") {
    val kafkaParams = new ju.HashMap[String, Object]()
    kafkaParams.put("acks", "0")
    // Here only host should be resolvable, it does not need a running instance of kafka server.
    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
    val producer = CachedKafkaProducer.getOrCreate(kafkaParams)
    val producer2 = CachedKafkaProducer.getOrCreate(kafkaParams)
    assert(producer == producer2)

    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
    val map = CachedKafkaProducer.invokePrivate(cacheMap())
    assert(map.size == 1)
  }

  test("Should close the correct kafka producer for the given kafkaPrams.") {
    val kafkaParams = new ju.HashMap[String, Object]()
    kafkaParams.put("acks", "0")
    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
    val producer: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
    kafkaParams.put("acks", "1")
    val producer2: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
    // With updated conf, a new producer instance should be created.
    assert(producer != producer2)

    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
    val map = CachedKafkaProducer.invokePrivate(cacheMap())
    assert(map.size == 2)

    CachedKafkaProducer.close(kafkaParams)
    val map2 = CachedKafkaProducer.invokePrivate(cacheMap())
    assert(map2.size == 1)
    import scala.collection.JavaConverters._
    val (seq: Seq[(String, Object)], _producer: KP) = map2.asScala.toArray.apply(0)
    assert(_producer == producer)
  }
} 
Example 9
Source File: KafkaWordCount.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 10
Source File: KafkaBatchProducer.scala    From gimel   with Apache License 2.0 5 votes vote down vote up
package com.paypal.gimel.kafka2.writer

import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.immutable.Map
import scala.language.implicitConversions
import scala.reflect.runtime.universe._

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame

import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants}
import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException}


  def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = {
    def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName()
    logger.info(" @Begin --> " + MethodName)

    val kafkaProps: Properties = conf.kafkaProducerProps
    logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}")
    val kafkaTopic = conf.kafkaTopics
    val kafkaTopicsOptionsMap : Map[String, Map[String, String]] =
      KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf)
    logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap)
    val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap)
    logger.info("Begin Publishing to Kafka....")
    try {
      val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic)
      kafkaTopicOptions match {
        case None =>
          throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""")
        case Some(kafkaOptions) =>
          dataFrame
            .write
            .format(KafkaConstants.KAFKA_FORMAT)
            .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic)
            .options(kafkaOptions)
            .save()
      }
    }
    catch {
      case ex: Throwable => {
        ex.printStackTrace()
        val msg =
          s"""
             |kafkaTopic -> ${kafkaTopic}
             |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}}
          """.stripMargin
        throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}")
      }
    }
    logger.info("Publish to Kafka - Completed !")
  }
} 
Example 11
Source File: Producer.scala    From fusion-data   with Apache License 2.0 5 votes vote down vote up
package kafkasample.demo

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata }

object Producer {
  def main(args: Array[String]): Unit = {
    val props = new Properties()
    props.put("bootstrap.servers", "localhost:9092")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)
    try {
      run(producer)
    } finally {
      TimeUnit.SECONDS.sleep(5)
      producer.close()
    }
  }

  private def run[K, V](producer: KafkaProducer[String, String]) {
    val record =
      new ProducerRecord[String, String]("customerCountries", "羊八井222")
    producer.send(record, (metadata: RecordMetadata, e: Exception) => {
      if (e ne null) {
        e.printStackTrace()
      }
      println(s"metadata: $metadata")
    })
  }
} 
Example 12
Source File: package.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.events
import java.util

import com.wavesplatform.events.protobuf.PBEvents
import com.wavesplatform.events.settings.BlockchainUpdatesSettings
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.config.SaslConfigs
import org.apache.kafka.common.serialization.{IntegerSerializer, Serializer}

package object kafka {
  private object BlockchainUpdatedSerializer extends Serializer[BlockchainUpdated] {
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
    override def close(): Unit                                                 = {}

    override def serialize(topic: String, data: BlockchainUpdated): Array[Byte] =
      PBEvents.protobuf(data).toByteArray
  }

  private object IntSerializer extends Serializer[Int] {
    val integerSerializer = new IntegerSerializer

    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = integerSerializer.configure(configs, isKey)
    override def close(): Unit                                                 = integerSerializer.close()

    override def serialize(topic: String, data: Int): Array[Byte] =
      integerSerializer.serialize(topic, data)
  }

  def createProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = new util.Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, settings.bootstrapServers)
    props.put(ProducerConfig.CLIENT_ID_CONFIG, settings.clientId)
    //  props.put(ProducerConfig.RETRIES_CONFIG, "0")

    // SASL_SSL
    if (settings.ssl.enabled) {
      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL")
      props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
      props.put(
        SaslConfigs.SASL_JAAS_CONFIG,
        s"org.apache.kafka.common.security.plain.PlainLoginModule required username = '${settings.ssl.username}' password = '${settings.ssl.password}';"
      )
    }
    props
  }

  def createProducerProperties(settings: BlockchainUpdatesSettings): util.Properties = {
    val props = createProperties(settings)
    props.put(ProducerConfig.ACKS_CONFIG, "all")
    props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, "10485760") // 10MB
    props
  }

  def createProducer(settings: BlockchainUpdatesSettings): KafkaProducer[Int, BlockchainUpdated] =
    new KafkaProducer[Int, BlockchainUpdated](createProducerProperties(settings), IntSerializer, BlockchainUpdatedSerializer)

  def createProducerRecord(topic: String, event: BlockchainUpdated): ProducerRecord[Int, BlockchainUpdated] = {
    val h = event match {
      case ap: BlockAppended                      => ap.toHeight
      case MicroBlockAppended(_, height, _, _, _) => height
      case RollbackCompleted(_, height)           => height
      case MicroBlockRollbackCompleted(_, height) => height
    }
    new ProducerRecord[Int, BlockchainUpdated](topic, h, event)
  }
} 
Example 13
Source File: SimpleProducer.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com.example.producer

import java.util.{Properties}

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}

object SimpleProducer extends App{
  val topic = "sample_topic"
  private val props = new Properties()

  props.put("bootstrap.servers", "localhost:9092")
  props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

  val producer = new KafkaProducer[String,String](props)
  try {
    for(i <- 0 to 10) {
      producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic"))
      println(s"Sent: $i")
    }
    println("Message sent successfully")
    producer.close()
  }
  catch {
    case ex: Exception =>
      ex.printStackTrace()
  }
} 
Example 14
Source File: KafkaMessageSender.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafka


  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
} 
Example 15
Source File: KafkaMessageSender.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.client

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer


class MessageSender(val brokers: String) {

  import MessageSender._
  val producer = new KafkaProducer[Array[Byte], Array[Byte]](
    providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName))

  def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get
    producer.flush()
  }

  def writeValue(topic: String, value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get
    producer.flush()
  }

  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
} 
Example 16
Source File: KafkaWordCount.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord}

import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

} 
Example 17
Source File: KafkaProducerUtils.scala    From bigdata-examples   with Apache License 2.0 5 votes vote down vote up
package com.timeyang.common.util

import java.util.Properties

import com.timeyang.common.config.BaseConf
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

object KafkaProducerUtils {

  @volatile lazy private val producer: KafkaProducer[String, String] = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BaseConf.kafkaBrokerList)
    props.put("acks", "all")
    props.put("retries", 1: Integer)
    props.put("batch.size", 16384: Integer)
    props.put("linger.ms", 1: Integer)
    props.put("buffer.memory", 33554432: Integer)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

    new KafkaProducer[String, String](props)
  }

  def sendJsonMessages(topic: String, event: Object, events: Object*): Unit = {
    for (event <- event +: events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, events: List[Object]): Unit = {
    for (event <- events) {
      val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
      producer.send(record)
    }
  }

  def send(topic: String, event: Object): Unit = {
    val record = new ProducerRecord[String, String](topic, null, JsonUtils.toJson(event))
    producer.send(record)
  }

} 
Example 18
Source File: CurrentDayMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.dayWindow

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =0;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
//      System.exit(-1)
    }
  }

} 
Example 19
Source File: LateDataMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.sideoutput.lateDataProcess

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.SECOND, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =74540;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(200)
      i = i + 1
//      System.exit(-1)
    }
  }

} 
Example 20
Source File: WindowDemoMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.trigger

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 21
Source File: FileSinkMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.text.SimpleDateFormat
import java.util.Calendar

import com.venn.common.Common
import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object FileSinkMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("roll_file_sink")
      Thread.sleep(100)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime)
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
//    producer.send(msg)
//    producer.flush()
  }

  var minute : Int = 1
  val calendar: Calendar = Calendar.getInstance()
  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }

} 
Example 22
Source File: IntervalJoinKafkaKeyMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.intervalJoin

import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object IntervalJoinKafkaKeyMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("topic_left")
      right("topic_right")
      Thread.sleep(500)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

  var idRight = 0

  def right(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idRight = idRight + 1
    val map = Map("id" -> idRight,  "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

} 
Example 23
Source File: SlotPartitionMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.demo

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {

    val prop = Common.getProp
    prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 24
Source File: KafkaOffsetRevertTest.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.kafka

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp(true))
    var i = 0;
    while (true) {

      //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 25
Source File: KafkaProducerConfig.scala    From freestyle-kafka   with Apache License 2.0 5 votes vote down vote up
package freestyle
package kafka

import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.Serializer
import collection.JavaConverters._

case class KafkaProducerConfig[K, V](
    configs: Map[String, Any],
    keyValueSerializers: Option[(Serializer[K], Serializer[V])])
    extends UnderlyingKafkaProducer[K, V] {
  override def producer: KafkaProducer[K, V] = KafkaProducerConfig.producerFromConfig(this)
}

object KafkaProducerConfig {

  private def toAnyRefMap(m: Map[String, Any]): java.util.Map[String, AnyRef] =
    m.asInstanceOf[Map[String, AnyRef]].asJava

  def producerFromConfig[K, V](config: KafkaProducerConfig[K, V]): KafkaProducer[K, V] =
    config.keyValueSerializers.fold(new KafkaProducer[K, V](toAnyRefMap(config.configs))) {
      case (ks, vs) =>
        new KafkaProducer[K, V](toAnyRefMap(config.configs), ks, vs)
    }

} 
Example 26
Source File: Kafka.scala    From event-sourcing-kafka-streams   with MIT License 5 votes vote down vote up
package org.amitayh.invoices.web

import java.time.Duration
import java.util.Collections.singletonList
import java.util.Properties

import cats.effect._
import cats.syntax.apply._
import cats.syntax.functor._
import fs2._
import org.amitayh.invoices.common.Config
import org.amitayh.invoices.common.Config.Topics.Topic
import org.apache.kafka.clients.consumer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.log4s.{Logger, getLogger}

import scala.collection.JavaConverters._

object Kafka {

  trait Producer[F[_], K, V] {
    def send(key: K, value: V): F[RecordMetadata]
  }

  object Producer {
    def apply[F[_]: Async, K, V](producer: KafkaProducer[K, V], topic: Topic[K, V]): Producer[F, K, V] =
      (key: K, value: V) => Async[F].async { cb =>
        val record = new ProducerRecord(topic.name, key, value)
        producer.send(record, (metadata: RecordMetadata, exception: Exception) => {
          if (exception != null) cb(Left(exception))
          else cb(Right(metadata))
        })
      }
  }

  def producer[F[_]: Async, K, V](topic: Topic[K, V]): Resource[F, Producer[F, K, V]] = Resource {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      new KafkaProducer[K, V](props, topic.keySerializer, topic.valueSerializer)
    }
    create.map(producer => (Producer(producer, topic), close(producer)))
  }

  def subscribe[F[_]: Sync, K, V](topic: Topic[K, V], groupId: String): Stream[F, (K, V)] = {
    val create = Sync[F].delay {
      val props = new Properties
      props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Config.BootstrapServers)
      props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId)
      val consumer = new KafkaConsumer(props, topic.keyDeserializer, topic.valueDeserializer)
      consumer.subscribe(singletonList(topic.name))
      consumer
    }
    Stream.bracket(create)(close[F]).flatMap(consume[F, K, V])
  }

  private val logger: Logger = getLogger

  def log[F[_]: Sync](msg: String): F[Unit] = Sync[F].delay(logger.info(msg))

  private def consume[F[_]: Sync, K, V](consumer: KafkaConsumer[K, V]): Stream[F, (K, V)] = for {
    records <- Stream.repeatEval(Sync[F].delay(consumer.poll(Duration.ofSeconds(1))))
    record <- Stream.emits(records.iterator.asScala.toSeq)
  } yield record.key -> record.value

  private def close[F[_]: Sync](producer: KafkaProducer[_, _]): F[Unit] =
    Sync[F].delay(producer.close()) *> log(s"Producer closed")

  private def close[F[_]: Sync](consumer: KafkaConsumer[_, _]): F[Unit] =
    Sync[F].delay(consumer.close()) *> log("Consumer closed")

} 
Example 27
Source File: CsvKafkaPublisher.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.sa.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


} 
Example 28
Source File: KafkaProducerUntil.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.sa.taxi360.common

import java.util.Properties

import org.apache.kafka.clients.producer.KafkaProducer

object
KafkaProducerUntil {
  def getNewProducer(brokerList:String,
                     acks:Int,
                     lingerMs:Int,
                     producerType:String,
                     batchSize:Int): KafkaProducer[String, String] = {
    val kafkaProps = new Properties
    kafkaProps.put("bootstrap.servers", brokerList)
    kafkaProps.put("metadata.broker.list", brokerList)
    kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("acks", acks.toString)
    kafkaProps.put("retries", "3")
    kafkaProps.put("producer.type", producerType)
    kafkaProps.put("linger.ms", lingerMs.toString)
    kafkaProps.put("batch.size", batchSize.toString)

    println("brokerList:" + brokerList)
    println("acks:" + acks)
    println("lingerMs:" + lingerMs)
    println("batchSize:" + batchSize)
    println("producerType:" + producerType)
    println(kafkaProps)

    return new KafkaProducer[String,String](kafkaProps)
  }
} 
Example 29
package packt.ch05

import java.util.{Date, Properties}

import packt.ch05.SimpleProducer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

object SimpleProducer {

  private var producer: KafkaProducer[String, String] = _

  def main(args: Array[String]) {
    val argsCount = args.length
    if (argsCount == 0 || argsCount == 1)
      throw new IllegalArgumentException(
        "Provide topic name and Message count as arguments")

    // Topic name and the message count to be published is passed from the
    // command line
    val topic = args(0)
    val count = args(1)

    val messageCount = java.lang.Integer.parseInt(count)
    println("Topic Name - " + topic)
    println("Message Count - " + messageCount)
    val simpleProducer = new SimpleProducer()
    simpleProducer.publishMessage(topic, messageCount)
  }
}

class SimpleProducer {

  val props = new Properties()

  // Set the broker list for requesting metadata to find the lead broker
  props.put("metadata.broker.list",
    "192.168.146.132:9092, 192.168.146.132:9093, 192.168 146.132:9094 ")

  //This specifies the serializer class for keys
  props.put("serializer.class", "kafka.serializer.StringEncoder")

  // 1 means the producer receives an acknowledgment once the lead replica
  // has received the data. This option provides better durability as the
  // client waits until the server acknowledges the request as successful.
  props.put("request.required.acks", "1")

  producer = new KafkaProducer(props)

  private def publishMessage(topic: String, messageCount: Int) {
    for (mCount <- 0 until messageCount) {
      val runtime = new Date().toString
      val msg = "Message Publishing Time - " + runtime
      println(msg)

      // Create a message
      val data = new ProducerRecord[String, String](topic, msg)

      // Publish the message
      producer.send(data)
    }

    // Close producer connection with broker.
    producer.close()
  }
} 
Example 30
package packt.ch05

import java.util

import kafka.utils.VerifiableProperties
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.Partitioner
import org.apache.kafka.common.Cluster

object SimplePartitioner {

  private var producer: KafkaProducer[String, String] = _
}

class SimplePartitioner(props: VerifiableProperties) extends Partitioner {

  def partition(key: AnyRef, a_numPartitions: Int): Int = {
    var partition = 0
    val partitionKey = key.asInstanceOf[String]
    val offset = partitionKey.lastIndexOf('.')
    if (offset > 0) {
      partition = java.lang.Integer.parseInt(partitionKey.substring(offset + 1)) %
        a_numPartitions
    }
    partition
  }

  override def partition(topic: String,
                         key: AnyRef,
                         keyBytes: Array[Byte],
                         value: AnyRef,
                         valueBytes: Array[Byte],
                         cluster: Cluster): Int = partition(key, 10)

  override def close() {
  }

  override def configure(configs: util.Map[String, _]) {
  }
} 
Example 31
Source File: StreamStaticDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
} 
Example 32
Source File: KafkaWordCount.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 33
Source File: WriteToKafka.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.bundle.kafka

import java.util

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import java.util.Properties

import org.apache.spark.sql.SparkSession
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.Producer
import org.apache.kafka.clients.producer.ProducerRecord

import scala.collection.mutable

class WriteToKafka extends ConfigurableStop{
  val description: String = "Write data to kafka"
  val inportList: List[String] = List(Port.DefaultPort)
  val outportList: List[String] = List(Port.DefaultPort)
  var kafka_host:String =_
  var topic:String=_

  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val df = in.read()
    val properties:Properties  = new Properties()
    properties.put("bootstrap.servers", kafka_host)
    properties.put("acks", "all")
    //properties.put("retries", 0)
    //properties.put("batch.size", 16384)
    //properties.put("linger.ms", 1)
    //properties.put("buffer.memory", 33554432)
    properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    var producer:Producer[String,String]  = new KafkaProducer[String,String](properties)

    df.collect().foreach(row=>{
      //var hm:util.HashMap[String,String]=new util.HashMap()
      //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String])))
      var res:List[String]=List()
      row.schema.fields.foreach(f=>{
          if(row.getAs(f.name)==null)res="None"::res
          else{
            res=row.getAs(f.name).asInstanceOf[String]::res
          }
        })
      val s:String=res.reverse.mkString(",")
      val record=new ProducerRecord[String,String](topic,s)
      producer.send(record)
    })
    producer.close()
  }


  def initialize(ctx: ProcessContext): Unit = {

  }


  def setProperties(map: Map[String, Any]): Unit = {
    kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String]
    //port=Integer.parseInt(MapUtil.get(map,key="port").toString)
    topic=MapUtil.get(map,key="topic").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true)
    val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true)
    descriptor = kafka_host :: descriptor
    descriptor = topic :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/kafka/WriteToKafka.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.KafkaGroup.toString)
  }

  override val authorEmail: String = "[email protected]"
} 
Example 34
Source File: KafkaWordCount.scala    From AI   with Apache License 2.0 5 votes vote down vote up
package com.bigchange.basic

import java.util

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount").
      set("spark.streaming.receiver.writeAheadLog.enable", "true").
      set("spark.streaming.kafka.maxRatePerPartition", "1000")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // 设置 checkpoint,这是考虑到了有 window 操作,window 操作一般是需要进行 checkpoint
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap

    // createStream 返回的是一个 Tuple2,具有 key,value,这里只关注 value.
    // 注意这里是 Receiver-based 方式(还提供了 non-receiver 模式),默认配置下,这种方式是会在 receiver 挂掉
    // 丢失数据的,需要设置 Write Ahead, 上面我们已经配置了, 那么存储 level 也可以进行相应调整.
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
    val words = lines.flatMap(_.split(" "))

    // 统计的是 10 分钟内的单词数量,每隔 10 秒统计 1 次
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Seconds(10), Seconds(2), 2).
      filter(x => x._2 > 0)

    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    // 需要注意的是这里是 broker list,为 host:port,host:port 形式
    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new util.HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while (true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(100).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

} 
Example 35
Source File: CsvKafkaPublisher.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


} 
Example 36
Source File: KafkaProducerUntil.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.common

import java.util.Properties

import org.apache.kafka.clients.producer.KafkaProducer

object
KafkaProducerUntil {
  def getNewProducer(brokerList:String,
                     acks:Int,
                     lingerMs:Int,
                     producerType:String,
                     batchSize:Int): KafkaProducer[String, String] = {
    val kafkaProps = new Properties
    kafkaProps.put("bootstrap.servers", brokerList)
    kafkaProps.put("metadata.broker.list", brokerList)
    kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("acks", acks.toString)
    kafkaProps.put("retries", "3")
    kafkaProps.put("producer.type", producerType)
    kafkaProps.put("linger.ms", lingerMs.toString)
    kafkaProps.put("batch.size", batchSize.toString)

    println("brokerList:" + brokerList)
    println("acks:" + acks)
    println("lingerMs:" + lingerMs)
    println("batchSize:" + batchSize)
    println("producerType:" + producerType)
    println(kafkaProps)

    return new KafkaProducer[String,String](kafkaProps)
  }
} 
Example 37
Source File: KafkaSink.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.kafka

import com.sksamuel.exts.Logging
import io.eels.schema.StructType
import io.eels.{Row, SinkWriter, Sink}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}


trait KafkaRowConverter[V] {
  def convert(row: Row): V
}

object KafkaRowConverter {
  implicit object NoopRowConverter extends KafkaRowConverter[Row] {
    override def convert(row: Row): Row = row
  }
}

case class KafkaSink[K, V](topic: String,
                           producer: KafkaProducer[K, V])
                          (implicit partitioner: KafkaPartitioner[V],
                           converter: KafkaRowConverter[V],
                           keygen: KafkaKeyGen[K]) extends Sink with Logging {

  def open(schema: StructType): SinkWriter = {

    new SinkWriter {
      override def write(row: Row): Unit = {
        val key = keygen.gen(row)
        val value = converter.convert(row)
        val record = partitioner.partition(row) match {
          case Some(part) => new ProducerRecord[K, V](topic, part, key, value)
          case _ => new ProducerRecord[K, V](topic, key, value)
        }
        logger.debug(s"Sending record $record")
        producer.send(record)
        producer.flush()
      }
      override def close(): Unit = producer.close()
    }
  }
} 
Example 38
Source File: KafkaSinkTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.kafka

import java.util
import java.util.{Properties, UUID}

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.{Deserializer, Serializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._
import scala.util.Try

class KafkaSinkTest extends FlatSpec with Matchers with BeforeAndAfterAll {

  implicit val kafkaConfig = EmbeddedKafkaConfig(
    kafkaPort = 6001,
    zooKeeperPort = 6000
  )
  Try {
    EmbeddedKafka.start()
  }

  val schema = StructType(
    Field("name", StringType, nullable = true),
    Field("location", StringType, nullable = true)
  )

  val ds = DataStream.fromValues(
    schema,
    Seq(
      Vector("clint eastwood", UUID.randomUUID().toString),
      Vector("elton john", UUID.randomUUID().toString)
    )
  )

  "KafkaSink" should "support default implicits" ignore {

    val topic = "mytopic-" + System.currentTimeMillis()

    val properties = new Properties()
    properties.put("bootstrap.servers", s"localhost:${kafkaConfig.kafkaPort}")
    properties.put("group.id", "test")
    properties.put("auto.offset.reset", "earliest")

    val producer = new KafkaProducer[String, Row](properties, StringSerializer, RowSerializer)
    val sink = KafkaSink(topic, producer)

    val consumer = new KafkaConsumer[String, String](properties, StringDeserializer, StringDeserializer)
    consumer.subscribe(util.Arrays.asList(topic))

    ds.to(sink)
    producer.close()

    val records = consumer.poll(4000)
    records.iterator().asScala.map(_.value).toList shouldBe ds.collect.map {
      case Row(_, values) => values.mkString(",")
    }.toList
  }
}

object RowSerializer extends Serializer[Row] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def serialize(topic: String, data: Row): Array[Byte] = data.values.mkString(",").getBytes
  override def close(): Unit = ()
}

object StringSerializer extends Serializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def serialize(topic: String, data: String): Array[Byte] = data.getBytes
}

object StringDeserializer extends Deserializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def deserialize(topic: String, data: Array[Byte]): String = new String(data)
} 
Example 39
Source File: KafkaMessagingSystem.scala    From amadou   with Apache License 2.0 5 votes vote down vote up
package com.mediative.amadou
package monitoring

import java.util.Properties
import com.typesafe.config.Config
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}

class KafkaMessagingSystem(config: Config) extends MessagingSystem with Logging {
  private val properties  = KafkaMessagingSystem.readProperties(config)
  private val producer    = new KafkaProducer[String, String](properties)
  private val topicPrefix = properties.getProperty("topic.prefix")

  override def publish(topic: String, message: String): Unit = {
    val topicName = s"$topicPrefix-$topic"

    logger.info(s"Publishing to $topicName :\n$message\n")

    producer.send(new ProducerRecord[String, String](topicName, message), new Callback {
      override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit =
        if (exception != null) {
          logger
            .error(s"Cannot publish to $topicName. Caused by: ${exception.getMessage}", exception)
        }
    })
    ()
  }

  override def stop(): Unit =
    producer.close()
}

object KafkaMessagingSystem {
  def readProperties(config: Config): Properties = {
    val propertiesKeys = Seq(
      "bootstrap.servers",
      "acks",
      "retries",
      "batch.size",
      "linger.ms",
      "buffer.memory",
      "key.serializer",
      "value.serializer",
      "topic.prefix")

    val properties = new Properties()
    propertiesKeys.foreach(key => properties.setProperty(key, config.getString(key)))

    properties
  }
} 
Example 40
Source File: EventProducer.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.rokku.proxy.provider.kafka

import akka.Done
import akka.http.scaladsl.model.HttpMethod
import com.ing.wbaa.rokku.proxy.config.KafkaSettings
import com.ing.wbaa.rokku.proxy.data.RequestId
import com.ing.wbaa.rokku.proxy.handler.LoggerHandlerWithId
import com.ing.wbaa.rokku.proxy.metrics.MetricsFactory
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.{ ExecutionContext, Future }

trait EventProducer {

  private val logger = new LoggerHandlerWithId

  import scala.collection.JavaConverters._

  protected[this] implicit val kafkaSettings: KafkaSettings

  protected[this] implicit val executionContext: ExecutionContext

  private lazy val config: Map[String, Object] =
    Map[String, Object](
      "bootstrap.servers" -> kafkaSettings.bootstrapServers,
      ProducerConfig.RETRIES_CONFIG -> kafkaSettings.retries,
      ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG -> kafkaSettings.retriesBackOff,
      ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG -> kafkaSettings.retriesBackOffMax,
      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> kafkaSettings.protocol,
      ProducerConfig.MAX_BLOCK_MS_CONFIG -> kafkaSettings.maxblock,
      ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG -> kafkaSettings.requestTimeoutMs,
      "ssl.truststore.location" -> kafkaSettings.sslTruststoreLocation,
      "ssl.truststore.password" -> kafkaSettings.sslTruststorePassword,
      "ssl.keystore.location" -> kafkaSettings.sslKeystoreLocation,
      "ssl.keystore.password" -> kafkaSettings.sslKeystorePassword,
      "ssl.key.password" -> kafkaSettings.sslKeyPassword
    )

  private lazy val kafkaProducer: KafkaProducer[String, String] = new KafkaProducer(config.asJava, new StringSerializer, new StringSerializer)

  def sendSingleMessage(event: String, topic: String, httpMethod: Option[HttpMethod] = None)(implicit id: RequestId): Future[Done] = {
    kafkaProducer
      .send(new ProducerRecord[String, String](topic, event), (metadata: RecordMetadata, exception: Exception) => {
        exception match {
          case e: Exception =>
            MetricsFactory.incrementKafkaSendErrors
            logger.error("error in sending event {} to topic {}, error={}", event, topic, e)
            throw new Exception(e)
          case _ =>
            httpMethod.map { m => MetricsFactory.incrementKafkaNotificationsSent(m) }
            logger.debug("Message sent {} to kafka, offset {}", event, metadata.offset())
        }
      }) match {
        case _ => Future(Done)
      }
  }
} 
Example 41
Source File: KafkaTransmitter.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.simulator.transmitters

import java.util.Properties

import akka.actor.{ActorLogging, Props}
import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import scala.sys.SystemProperties
import com.typesafe.config.Config


object KafkaTransmitter {
  def props(topic: String)(implicit config: Config) = Props(new KafkaTransmitter(topic))
}

class KafkaTransmitter(topic: String)(implicit config: Config) extends DataTransmitter with ActorLogging {

  private val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("transmitter.kafka.bootstrap-servers"))
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.key-serializer"))
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, config.getString("transmitter.kafka.value-serializer"))

  // Enable settings for a secure environment, if necessary.
  // See: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.4/bk_secure-kafka-ambari/content/ch_secure-kafka-produce-events.html
  val systemProperties = new SystemProperties
  if (config.getBoolean("transmitter.kafka.security-enabled")) {
    props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString("transmitter.kafka.security-protocol"))
    systemProperties.put("java.security.auth.login.config", config.getString("transmitter.kafka.jaas-file"))
  }

  private val producer = new KafkaProducer[String, String](props)

  def receive = {
    case Transmit(data) => producer.send(new ProducerRecord(topic, data.toCSV))
  }

  override def postStop(): Unit = {
    producer.close()
    log.info("KafkaTransmitter closed its producer.")
  }
} 
Example 42
Source File: ActionsHandler.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import java.util.Properties
import scala.collection.mutable.ArrayBuffer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.spark.internal.Logging
import org.apache.spark.sql.Row
import java.sql.Timestamp
import org.apache.spark.sql.types.StructType
import java.util.concurrent.atomic.AtomicInteger


	def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries;
	def destroy();
}

trait ActionsHandlerFactory {
	def createInstance(params: Params): ActionsHandler;
}

abstract class AbstractActionsHandler extends ActionsHandler {
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any = {
		val opt = requestBody.get(key);
		if (opt.isEmpty) {
			throw new MissingRequiredRequestParameterException(key);
		}

		opt.get;
	}

	override def destroy() = {
	}
}

class NullActionsHandler extends AbstractActionsHandler {
	override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() {
		def apply(action: String) = Map[String, Any]();
		//yes, do nothing
		def isDefinedAt(action: String) = false;
	};
}

//rich row with extra info: id, time stamp, ...
case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) {
	def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp);
	def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch");
	def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) };
}

trait SendStreamActionSupport {
	def onReceiveStream(topic: String, rows: Array[RowEx]);
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any;

	val listeners = ArrayBuffer[StreamListener]();

	def addListener(listener: StreamListener): this.type = {
		listeners += listener;
		this;
	}

	protected def notifyListeners(topic: String, data: Array[RowEx]) {
		listeners.foreach { _.onArrive(topic, data); }
	}

	def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = {
		val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String];
		val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long];
		val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]];
		val ts = new Timestamp(System.currentTimeMillis());
		var index = -1;
		val rows2 = rows.map { row ⇒
			index += 1;
			RowEx(Row.fromSeq(row.toSeq), batchId, index, ts)
		}

		onReceiveStream(topic, rows2);
		notifyListeners(topic, rows2);
		Map("rowsCount" -> rows.size);
	}
} 
Example 43
Source File: KafkaAsReceiver.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import java.util.Properties

import org.apache.kafka.clients.producer.Callback
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.producer.RecordMetadata
import org.apache.spark.internal.Logging


class KafkaAsReceiver(bootstrapServers: String) extends AbstractActionsHandler with SendStreamActionSupport with Logging {
	val props = new Properties();
	props.put("bootstrap.servers", bootstrapServers);
	props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
	props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
	val producer = new KafkaProducer[String, String](props);

	override def listActionHandlerEntries(requestBody: Map[String, Any]): PartialFunction[String, Map[String, Any]] = {
		case "actionSendStream" ⇒ handleSendStream(requestBody);
	}

	override def destroy() {
		producer.close();
	}

	override def onReceiveStream(topic: String, rows: Array[RowEx]) = {
		var index = -1;
		for (row ← rows) {
			index += 1;
			val key = "" + row.batchId + "-" + row.offsetInBatch;
			//TODO: send an array instead of a string value?
			val value = row.originalRow(0).toString();
			val record = new ProducerRecord[String, String](topic, key, value);
			producer.send(record, new Callback() {
				def onCompletion(metadata: RecordMetadata, e: Exception) = {
					if (e != null) {
						e.printStackTrace();
						logError(e.getMessage);
					}
					else {
						val offset = metadata.offset();
						val partition = metadata.partition();
						logDebug(s"record is sent to kafka:key=$key, value=$value, partition=$partition, offset=$offset");
					}
				}
			});
		}
	}
}

class KafkaAsReceiverFactory extends ActionsHandlerFactory {
	def createInstance(params: Params) = new KafkaAsReceiver(params.getRequiredString("bootstrapServers"));
} 
Example 44
Source File: StreamStreamDataGenerator.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
} 
Example 45
Source File: DataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
} 
Example 46
Source File: ExampleExternalStateSpec.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.example

import java.util.Properties

import com.typesafe.config.ConfigFactory
import io.amient.affinity.core.cluster.Node
import io.amient.affinity.core.util.AffinityTestBase
import io.amient.affinity.kafka.EmbeddedKafka
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.scalatest.concurrent.TimeLimitedTests
import org.scalatest.time.{Millis, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._

class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll
  with TimeLimitedTests {

  override def numPartitions = 2

  val config = configure(ConfigFactory.load("example-external-state"))

  val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic")

  val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap)))

  override def beforeAll: Unit = try {
    createTopic(topic)
    val externalProducer = createKafkaAvroProducer[String, String]()
    try {
      externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again"))
      externalProducer.flush()
    } finally {
      externalProducer.close()
    }
    //the external fixture is produced and the externalProducer is flushed() before the node is started
    node.start()
    node.awaitClusterReady()
    //at this point all stores have loaded everything available in the external topic so the test will be deterministic
  } finally {
    super.beforeAll()
  }

  override def afterAll: Unit = try {
    node.shutdown()
  } finally {
    super.afterAll()
  }

  behavior of "External State"

  val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time

  it should "start automatically tailing state partitions on startup even when master" in {
    //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above
    //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic
    val response = node.get_text(node.http_get(s"/news/latest"))
    response should include("10:30\tthe universe is expanding")
    response should include("11:00\tthe universe is still expanding")
    response should include("11:30\tthe universe briefly contracted but is expanding again")

  }

  private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties {
    put("bootstrap.servers", kafkaBootstrap)
    put("acks", "1")
    put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    //this simply adds all configs required by KafkaAvroSerializer
    config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) =>
      put(entry.getKey, entry.getValue.unwrapped())
    }
  })


} 
Example 47
Source File: MultiDataStreamer.scala    From structured-streaming-application   with Apache License 2.0 5 votes vote down vote up
package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
} 
Example 48
Source File: KafkaOutput.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.kafka

import java.io.{Serializable => JSerializable}
import java.util.Properties

import com.stratio.sparta.plugin.input.kafka.KafkaBase
import com.stratio.sparta.sdk.pipeline.output.Output._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import com.stratio.sparta.sdk.properties.CustomProperties
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.kafka.clients.producer.ProducerConfig._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.sql._

import scala.collection.mutable

class KafkaOutput(name: String, properties: Map[String, JSerializable])
  extends Output(name, properties) with KafkaBase with CustomProperties {

  val DefaultKafkaSerializer = classOf[StringSerializer].getName
  val DefaultAck = "0"
  val DefaultBatchNumMessages = "200"
  val DefaultProducerPort = "9092"

  override val customKey = "KafkaProperties"
  override val customPropertyKey = "kafkaPropertyKey"
  override val customPropertyValue = "kafkaPropertyValue"

  val outputFormat = OutputFormatEnum.withName(properties.getString("format", "json").toUpperCase)
  val rowSeparator = properties.getString("rowSeparator", ",")

  override def supportedSaveModes: Seq[SaveModeEnum.Value] = Seq(SaveModeEnum.Append)

  override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = {
    val tableName = getTableNameFromOptions(options)

    validateSaveMode(saveMode)

    outputFormat match {
      case OutputFormatEnum.ROW => dataFrame.rdd.foreachPartition(messages =>
        messages.foreach(message => send(tableName, message.mkString(rowSeparator))))
      case _ => dataFrame.toJSON.foreachPartition { messages =>
        messages.foreach(message => send(tableName, message))
      }
    }
  }

  def send(topic: String, message: String): Unit = {
    val record = new ProducerRecord[String, String](topic, message)
    KafkaOutput.getProducer(getProducerConnectionKey, createProducerProps).send(record)
  }

  private[kafka] def getProducerConnectionKey: String =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort)
      .getOrElse(BOOTSTRAP_SERVERS_CONFIG, throw new Exception("Invalid metadata broker list"))

  private[kafka] def createProducerProps: Properties = {
    val props = new Properties()
    properties.filter(_._1 != customKey).foreach { case (key, value) => props.put(key, value.toString) }
    mandatoryOptions.foreach { case (key, value) => props.put(key, value) }
    getCustomProperties.foreach { case (key, value) => props.put(key, value) }
    props
  }

  private[kafka] def mandatoryOptions: Map[String, String] =
    getHostPort(BOOTSTRAP_SERVERS_CONFIG, DefaultHost, DefaultProducerPort) ++
      Map(
        KEY_SERIALIZER_CLASS_CONFIG -> properties.getString(KEY_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        VALUE_SERIALIZER_CLASS_CONFIG -> properties.getString(VALUE_SERIALIZER_CLASS_CONFIG, DefaultKafkaSerializer),
        ACKS_CONFIG -> properties.getString(ACKS_CONFIG, DefaultAck),
        BATCH_SIZE_CONFIG -> properties.getString(BATCH_SIZE_CONFIG, DefaultBatchNumMessages)
      )

  override def cleanUp(options: Map[String, String]): Unit = {
    log.info(s"Closing Kafka producer in Kafka Output: $name")
    KafkaOutput.closeProducers()
  }
}

object KafkaOutput {

  private val producers: mutable.Map[String, KafkaProducer[String, String]] = mutable.Map.empty

  def getProducer(producerKey: String, properties: Properties): KafkaProducer[String, String] = {
    getInstance(producerKey, properties)
  }

  def closeProducers(): Unit = {
    producers.values.foreach(producer => producer.close())
  }

  private[kafka] def getInstance(key: String, properties: Properties): KafkaProducer[String, String] = {
    producers.getOrElse(key, {
      val producer = new KafkaProducer[String, String](properties)
      producers.put(key, producer)
      producer
    })
  }
} 
Example 49
Source File: KafkaProducerInjector.scala    From SparkOnKudu   with Apache License 2.0 5 votes vote down vote up
package org.kududb.spark.demo.gamer.aggregates

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}


object KafkaProducerInjector {


  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("{brokerList} {topic} {#OfRecords} {sleepTimeEvery10Records} {#OfGamers}")
      return
    }

    val brokerList = args(0)
    val topic = args(1)
    val numOfRecords = args(2).toInt
    val sleepTimeEvery10Records = args(3).toInt
    val numOfGamers = args(4).toInt

    val producer = getNewProducer(brokerList)

    for (i <- 0 until numOfRecords) {

      val gamerRecord = GamerDataGenerator.makeNewGamerRecord(numOfGamers)

      val message = new ProducerRecord[String, String](topic, gamerRecord.gamerId.toString,  gamerRecord.toString())

      producer.send(message)

      if (i % 10 == 0) {
        Thread.sleep(sleepTimeEvery10Records)
        print(".")
      }
      if (i % 2000 == 0) {
        println()
        println("Records Sent:" + i)
        println()
      }
    }
  }

  def getNewProducer(brokerList:String): KafkaProducer[String, String] = {
    val kafkaProps = new Properties
    kafkaProps.put("bootstrap.servers", brokerList)
    kafkaProps.put("metadata.broker.list", brokerList)

    // This is mandatory, even though we don't send keys
    kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    kafkaProps.put("acks", "0")

    // how many times to retry when produce request fails?
    kafkaProps.put("retries", "3")
    kafkaProps.put("linger.ms", "2")
    kafkaProps.put("batch.size", "1000")
    kafkaProps.put("queue.time", "2")

    new KafkaProducer[String, String](kafkaProps)
  }


} 
Example 50
Source File: NumericalDataProducer.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.integrationtest.kafka

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.apache.log4j.Logger

import org.apache.gearpump.streaming.serializer.ChillSerializer

class NumericalDataProducer(topic: String, bootstrapServers: String) {

  private val LOG = Logger.getLogger(getClass)
  private val producer = createProducer
  private val WRITE_SLEEP_NANOS = 10
  private val serializer = new ChillSerializer[Int]
  var lastWriteNum = 0

  def start(): Unit = {
    produceThread.start()
  }

  def stop(): Unit = {
    if (produceThread.isAlive) {
      produceThread.interrupt()
      produceThread.join()
    }
    producer.close()
  }

  
  def producedNumbers: Range = {
    Range(1, lastWriteNum + 1)
  }

  private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = {
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", bootstrapServers)
    new KafkaProducer[Array[Byte], Array[Byte]](properties,
      new ByteArraySerializer, new ByteArraySerializer)
  }

  private val produceThread = new Thread(new Runnable {
    override def run(): Unit = {
      try {
        while (!Thread.currentThread.isInterrupted) {
          lastWriteNum += 1
          val msg = serializer.serialize(lastWriteNum)
          val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg)
          producer.send(record)
          Thread.sleep(0, WRITE_SLEEP_NANOS)
        }
      } catch {
        case ex: InterruptedException =>
          LOG.error("message producing is stopped by an interrupt")
      }
    }
  })
} 
Example 51
Source File: AbstractKafkaSink.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.sink

import java.util.Properties

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.sink.DataSink
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer

object AbstractKafkaSink {
  private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink])

  val producerFactory = new KafkaProducerFactory {
    override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = {
      new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig,
        new ByteArraySerializer, new ByteArraySerializer)
    }
  }

  trait KafkaProducerFactory extends java.io.Serializable {
    def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]]
  }
}

abstract class AbstractKafkaSink private[kafka](
    topic: String,
    props: Properties,
    kafkaConfigFactory: KafkaConfigFactory,
    factory: KafkaProducerFactory) extends DataSink {
  import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._

  def this(topic: String, props: Properties) = {
    this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory)
  }

  private lazy val config = kafkaConfigFactory.getKafkaConfig(props)
  // Lazily construct producer since KafkaProducer is not serializable
  private lazy val producer = factory.getKafkaProducer(config)

  override def open(context: TaskContext): Unit = {
    LOG.info("KafkaSink opened")
  }

  override def write(message: Message): Unit = {
    message.value match {
      case (k: Array[Byte], v: Array[Byte]) =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case v: Array[Byte] =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case m =>
        val errorMsg = s"unexpected message type ${m.getClass}; " +
          s"Array[Byte] or (Array[Byte], Array[Byte]) required"
        LOG.error(errorMsg)
    }
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaSink closed")
  }
} 
Example 52
Source File: KafkaStore.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.store

import java.util.Properties

import com.twitter.bijection.Injection
import kafka.api.OffsetRequest
import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory}
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer


class KafkaStore private[kafka](
    val topic: String,
    val producer: KafkaProducer[Array[Byte], Array[Byte]],
    val optConsumer: Option[KafkaConsumer])
  extends CheckpointStore {
  import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._

  private var maxTime: MilliSeconds = 0L

  override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = {
    // make sure checkpointed timestamp is monotonically increasing
    // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3)
    if (time > maxTime) {
      maxTime = time
    }
    val key = maxTime
    val value = checkpoint
    val message = new ProducerRecord[Array[Byte], Array[Byte]](
      topic, 0, Injection[Long, Array[Byte]](key), value)
    producer.send(message)
    LOG.debug("KafkaStore persisted state ({}, {})", key, value)
  }

  override def recover(time: MilliSeconds): Option[Array[Byte]] = {
    var checkpoint: Option[Array[Byte]] = None
    optConsumer.foreach { consumer =>
      while (consumer.hasNext && checkpoint.isEmpty) {
        val kafkaMsg = consumer.next()
        checkpoint = for {
          k <- kafkaMsg.key
          t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption
          c = kafkaMsg.msg if t >= time
        } yield c
      }
      consumer.close()
    }
    checkpoint match {
      case Some(c) =>
        LOG.info(s"KafkaStore recovered checkpoint ($time, $c)")
      case None =>
        LOG.info(s"no checkpoint existing for $time")
    }
    checkpoint
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaStore closed")
  }
} 
Example 53
Source File: KafkaSinkSpec.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka

import java.util.Properties

import com.twitter.bijection.Injection
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.mockito.Mockito._
import org.scalacheck.Gen
import org.scalatest.mock.MockitoSugar
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.MockUtil

class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar {

  val dataGen = for {
    topic <- Gen.alphaStr
    key <- Gen.alphaStr
    msg <- Gen.alphaStr
  } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg))

  property("KafkaSink write should send producer record") {
    forAll(dataGen) {
      (data: (String, Array[Byte], Array[Byte])) =>
        val props = mock[Properties]
        val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
        val producerFactory = mock[KafkaProducerFactory]
        val configFactory = mock[KafkaConfigFactory]
        val config = mock[KafkaConfig]

        when(configFactory.getKafkaConfig(props)).thenReturn(config)
        when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

        val (topic, key, msg) = data
        val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory)
        kafkaSink.write(Message((key, msg)))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg)))
        kafkaSink.write(Message(msg))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg)
        ))
        kafkaSink.close()
    }
  }

  property("KafkaSink close should close kafka producer") {
    val props = mock[Properties]
    val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
    val producerFactory = mock[KafkaProducerFactory]
    val configFactory = mock[KafkaConfigFactory]
    val config = mock[KafkaConfig]

    when(configFactory.getKafkaConfig(props)).thenReturn(config)
    when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

    val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory)
    kafkaSink.close()
    verify(producer).close()
  }
} 
Example 54
Source File: KafkaWordCount.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 55
Source File: KafkaPublisher.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

class KafkaPublisher {

  val props = new Properties()
  props.put("bootstrap.servers", "localhost:9092")
  props.put("partition.assignment.strategy", "range")
  props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")

  val producer = new KafkaProducer[Array[Byte], Array[Byte]](props)

  def send(topic: String, event: Array[Byte]): Unit = {
    producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
  }

  def send(topic: String, events: List[Array[Byte]]): Unit = {
    for (event <- events) {
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
    }
  }

} 
Example 56
Source File: ExternalKafkaProcessorSupplier.scala    From haystack-trends   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trends.kstream.processor

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.trends.config.entities.KafkaProduceConfiguration
import com.expedia.www.haystack.trends.kstream.serde.TrendMetricSerde.metricRegistry
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.streams.processor.{AbstractProcessor, Processor, ProcessorContext, ProcessorSupplier}
import org.slf4j.LoggerFactory

class ExternalKafkaProcessorSupplier(kafkaProduceConfig: KafkaProduceConfiguration) extends ProcessorSupplier[String, MetricData] {

  private val LOGGER = LoggerFactory.getLogger(this.getClass)
  private val metricPointExternalKafkaSuccessMeter = metricRegistry.meter("metricpoint.kafka-external.success")
  private val metricPointExternalKafkaFailureMeter = metricRegistry.meter("metricpoint.kafka-external.failure")

  def get: Processor[String, MetricData] = {
    new ExternalKafkaProcessor(kafkaProduceConfig: KafkaProduceConfiguration)
  }

  
    def process(key: String, value: MetricData): Unit = {

      val kafkaMessage = new ProducerRecord(kafkaProduceTopic,
        key, value)
      kafkaProducer.send(kafkaMessage, new Callback {
        override def onCompletion(recordMetadata: RecordMetadata, e: Exception): Unit = {
          if (e != null) {
            LOGGER.error(s"Failed to produce the message to kafka for topic=$kafkaProduceTopic, with reason=", e)
            metricPointExternalKafkaFailureMeter.mark()
          } else {
            metricPointExternalKafkaSuccessMeter.mark()
          }
        }
      })
    }
  }
} 
Example 57
Source File: BgTestHelpers.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.bg.test

import java.util.Properties

import cmwell.driver.Dao
import cmwell.fts.FTSService
import com.typesafe.config.{ConfigFactory, ConfigValueFactory}
import org.apache.kafka.clients.producer.KafkaProducer
import org.elasticsearch.action.ActionListener
import org.elasticsearch.action.admin.indices.create.{CreateIndexRequest, CreateIndexResponse}
import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequest
import org.elasticsearch.action.support.master.AcknowledgedResponse
import org.elasticsearch.common.xcontent.XContentType
import concurrent.duration._
import scala.concurrent.{Await, Promise}
import scala.io.Source

object BgTestHelpers {
  def kafkaProducer(bootstrapServers: String)= {
    val producerProperties = new Properties
    producerProperties.put("bootstrap.servers", bootstrapServers)
    producerProperties.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
    producerProperties.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
    new KafkaProducer[Array[Byte], Array[Byte]](producerProperties)
  }

  def dao(address: String, port: Int) = {
    // scalastyle:off
    val initCommands = Some(List(
      "CREATE KEYSPACE IF NOT EXISTS data2 WITH REPLICATION = {'class' : 'SimpleStrategy', 'replication_factor' : 1};",
      "CREATE TABLE IF NOT EXISTS data2.Path ( path text, uuid text, last_modified timestamp, PRIMARY KEY ( path, last_modified, uuid ) ) WITH CLUSTERING ORDER BY (last_modified DESC, uuid ASC) AND compression = { 'class' : 'LZ4Compressor' } AND caching = {'keys':'ALL', 'rows_per_partition':'1'};",
      "CREATE TABLE IF NOT EXISTS data2.Infoton (uuid text, quad text, field text, value text, data blob, PRIMARY KEY (uuid,quad,field,value)) WITH compression = { 'class' : 'LZ4Compressor' } AND caching = {'keys':'ALL', 'rows_per_partition':'1000'};"
    ))
    // scalastyle:on
    Dao("Test","data2", address, port, initCommands = initCommands)
  }

  def ftsOverridesConfig(address: String, port: Int) = {
    ConfigFactory.load()
      .withValue("ftsService.clusterName", ConfigValueFactory.fromAnyRef("docker-cluster"))
      .withValue("ftsService.transportAddress", ConfigValueFactory.fromIterable(java.util.Arrays.asList(address)))
      .withValue("ftsService.transportPort", ConfigValueFactory.fromAnyRef(port))
  }

  def initFTSService(ftsService: FTSService) = {
    val putTemplateRequest = new PutIndexTemplateRequest("indices_template")
    val indicesTemplateStr = {
      val templateSource = Source.fromURL(this.getClass.getResource("/indices_template.json"))
      try templateSource.getLines.mkString("\n") finally templateSource.close()
    }
    putTemplateRequest.source(indicesTemplateStr, XContentType.JSON)
    val putTemplatePromise = Promise[AcknowledgedResponse]()
    ftsService.client.admin().indices().putTemplate(putTemplateRequest, new ActionListener[AcknowledgedResponse] {
      override def onResponse(response: AcknowledgedResponse): Unit = putTemplatePromise.success(response)
      override def onFailure(e: Exception): Unit = putTemplatePromise.failure(e)
    })
    val putTemplateAck = Await.result(putTemplatePromise.future, 1.minute)
    if (!putTemplateAck.isAcknowledged)
      throw new Exception("ES didn't acknowledge the put template request")
    val createIndexPromise = Promise[AcknowledgedResponse]()
    ftsService.client.admin().indices().create(new CreateIndexRequest("cm_well_p0_0"), new ActionListener[CreateIndexResponse] {
      override def onResponse(response: CreateIndexResponse): Unit = createIndexPromise.success(response)
      override def onFailure(e: Exception): Unit = createIndexPromise.failure(e)
    })
    val createIndexResponse = Await.result(putTemplatePromise.future, 1.minute)
    if (!createIndexResponse.isAcknowledged)
      throw new Exception("ES didn't acknowledge the create index request")
  }

} 
Example 58
Source File: KafkaWordCount.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.streaming

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._


object KafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCount <zkQuorum> <group> <topics> <numThreads>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(zkQuorum, group, topics, numThreads) = args
    val sparkConf = new SparkConf().setAppName("KafkaWordCount")
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    ssc.checkpoint("checkpoint")

    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L))
      .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
    wordCounts.print()

    ssc.start()
    ssc.awaitTermination()
  }
}

// Produces some random words between 1 and 100.
object KafkaWordCountProducer {

  def main(args: Array[String]) {
    if (args.length < 4) {
      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
        "<messagesPerSec> <wordsPerMessage>")
      System.exit(1)
    }

    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args

    // Zookeeper connection properties
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
      "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    // Send some messages
    while(true) {
      (1 to messagesPerSec.toInt).foreach { messageNum =>
        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
          .mkString(" ")

        val message = new ProducerRecord[String, String](topic, null, str)
        producer.send(message)
      }

      Thread.sleep(1000)
    }
  }

}
// scalastyle:on println 
Example 59
Source File: StreamingProducerApp.scala    From Scala-Programming-Projects   with MIT License 5 votes vote down vote up
package coinyser

import cats.effect.{ExitCode, IO, IOApp}
import com.pusher.client.Pusher
import StreamingProducer._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import scala.collection.JavaConversions._

object StreamingProducerApp extends IOApp {
  val topic = "transactions"

  val pusher = new Pusher("de504dc5763aeef9ff52")

  val props = Map(
    "bootstrap.servers" -> "localhost:9092",
    "key.serializer" -> "org.apache.kafka.common.serialization.IntegerSerializer",
    "value.serializer" -> "org.apache.kafka.common.serialization.StringSerializer")

  def run(args: List[String]): IO[ExitCode] = {
    val kafkaProducer = new KafkaProducer[Int, String](props)

    subscribe(pusher) { wsTx =>
      val tx = convertWsTransaction(deserializeWebsocketTransaction(wsTx))
      val jsonTx = serializeTransaction(tx)
      kafkaProducer.send(new ProducerRecord(topic, tx.tid, jsonTx))
    }.flatMap(_ => IO.never)
  }
}