org.apache.kafka.common.serialization.StringDeserializer Scala Examples
The following examples show how to use org.apache.kafka.common.serialization.StringDeserializer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamingKafka10.scala From BigData-News with Apache License 2.0 | 7 votes |
package com.vita.spark import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.kafka010.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe object StreamingKafka10 { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .master("local[2]") .appName("streaming") .getOrCreate() val sc = spark.sparkContext val ssc = new StreamingContext(sc, Seconds(5)) val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "node6:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "0001", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val topics = Array("weblogs") val stream = KafkaUtils.createDirectStream[String, String]( ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams) ) val lines = stream.map(x => x.value()) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _) wordCounts.print() ssc.start() ssc.awaitTermination() } }
Example 2
Source File: KafkaTopicInfo.scala From matcher with MIT License | 7 votes |
package tools import java.io.File import akka.actor.ActorSystem import com.typesafe.config.ConfigFactory import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta} import com.wavesplatform.dex.settings.toConfigOps import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.JavaConverters._ import scala.concurrent.duration.DurationInt object KafkaTopicInfo extends App { implicit val system: ActorSystem = ActorSystem() val configFile = new File(args(0)) val topic = args(1) val from = args(2).toLong val max = args(3).toInt println(s"""configFile: ${configFile.getAbsolutePath} |topic: $topic |from: $from |max: $max""".stripMargin) val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos) val config = ConfigFactory .parseString("""waves.dex.events-queue.kafka.consumer.client { | client.id = "kafka-topics-info" | enable.auto.commit = false | auto.offset.reset = earliest |} | |""".stripMargin) .withFallback { ConfigFactory .parseFile(configFile) .withFallback(ConfigFactory.defaultApplication()) .withFallback(ConfigFactory.defaultReference()) .resolve() .getConfig("waves.dex.events-queue.kafka") } val consumer = new KafkaConsumer[String, QueueEvent]( config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties, new StringDeserializer, eventDeserializer ) try { val topicPartition = new TopicPartition(topic, 0) val topicPartitions = java.util.Collections.singletonList(topicPartition) consumer.assign(topicPartitions) { val r = consumer.partitionsFor(topic, requestTimeout) println(s"Partitions:\n${r.asScala.mkString("\n")}") } { val r = consumer.endOffsets(topicPartitions, requestTimeout) println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}") } consumer.seek(topicPartition, from) val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos) val lastOffset = from + max var continue = true while (continue) { println(s"Reading from Kafka") val xs = consumer.poll(pollDuriation).asScala.toVector xs.foreach { msg => println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value())) } xs.lastOption.foreach { x => if (x.offset() == lastOffset) continue = false } } } finally { consumer.close() } }
Example 3
Source File: IntegrationTest.scala From kmq with Apache License 2.0 | 6 votes |
package com.softwaremill.kmq.redelivery import java.time.Duration import java.util.Random import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions} import akka.stream.ActorMaterializer import akka.testkit.TestKit import com.softwaremill.kmq._ import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.concurrent.Eventually import org.scalatest.time.{Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers} import scala.collection.mutable.ArrayBuffer class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers { implicit val materializer = ActorMaterializer() import system.dispatcher "KMQ" should "resend message if not committed" in { val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}" val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis, 1000) val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServer) .withGroupId(kmqConfig.getMsgConsumerGroupId) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val markerProducerSettings = ProducerSettings(system, new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer()) .withBootstrapServers(bootstrapServer) .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName) val markerProducer = markerProducerSettings.createKafkaProducer() val random = new Random() lazy val processedMessages = ArrayBuffer[String]() lazy val receivedMessages = ArrayBuffer[String]() val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic .map { msg => ProducerMessage.Message( new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg) } .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker .map(_.message.passThrough) .mapAsync(1) { msg => msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched } .map { msg => receivedMessages += msg.value msg } .filter(_ => random.nextInt(5) != 0) .map { processedMessage => processedMessages += processedMessage.value new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE) } .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers .run() val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig) val messages = (0 to 20).map(_.toString) messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg)) eventually { receivedMessages.size should be > processedMessages.size processedMessages.sortBy(_.toInt).distinct shouldBe messages }(PatienceConfig(timeout = Span(15, Seconds)), implicitly) redeliveryHook.close() control.shutdown() } override def afterAll(): Unit = { super.afterAll() TestKit.shutdownActorSystem(system) } }
Example 4
Source File: KafkaSpec.scala From kmq with Apache License 2.0 | 6 votes |
package com.softwaremill.kmq.redelivery.infrastructure import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.{BeforeAndAfterEach, Suite} trait KafkaSpec extends BeforeAndAfterEach { self: Suite => val testKafkaConfig = EmbeddedKafkaConfig(9092, 2182) private implicit val stringDeserializer = new StringDeserializer() def sendToKafka(topic: String, message: String): Unit = { EmbeddedKafka.publishStringMessageToKafka(topic, message)(testKafkaConfig) } def consumeFromKafka(topic: String): String = { EmbeddedKafka.consumeFirstStringMessageFrom(topic)(testKafkaConfig) } override def beforeEach(): Unit = { super.beforeEach() EmbeddedKafka.start()(testKafkaConfig) } override def afterEach(): Unit = { super.afterEach() EmbeddedKafka.stop() } }
Example 5
Source File: ConsumerBuilder.scala From asura with MIT License | 5 votes |
package asura.kafka import akka.actor.ActorSystem import akka.kafka.scaladsl.Consumer import akka.kafka.scaladsl.Consumer.Control import akka.kafka.{ConsumerSettings, Subscriptions} import akka.stream.scaladsl.Source import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer} import scala.collection.JavaConverters._ object ConsumerBuilder { def buildAvroSource[V]( brokerUrl: String, schemaRegisterUrl: String, group: String, topics: Set[String], resetType: String = "latest", )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = { val kafkaAvroSerDeConfig = Map[String, Any]( AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl, KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString ) val consumerSettings: ConsumerSettings[String, V] = { val kafkaAvroDeserializer = new KafkaAvroDeserializer() kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false) val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]] ConsumerSettings(system, new StringDeserializer, deserializer) .withBootstrapServers(brokerUrl) .withGroupId(group) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType) } Consumer.plainSource(consumerSettings, Subscriptions.topics(topics)) } }
Example 6
Source File: Consumers.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License | 5 votes |
import akka.kafka._ import akka.kafka.scaladsl._ import org.apache.kafka.common.serialization.StringDeserializer import org.apache.kafka.common.serialization.ByteArrayDeserializer import org.apache.kafka.clients.consumer.ConsumerConfig val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers("localhost:9092") .withGroupId("group1") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") // Consume messages and store a representation, including offset, in DB example: db.loadOffset().foreach { fromOffset => val subscription = Subscriptions.assignmentWithOffset(new TopicPartition("topic1", 1) -> fromOffset) Consumer.plainSource(consumerSettings, subscription) .mapAsync(1)(db.save)} // Consume messages at-most-once example: Consumer.atMostOnceSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1")) .mapAsync(1) { record => rocket.launch(record.value } // Consume messages at-least-once example: Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1")) .mapAsync(1) { msg => db.update(msg.value).flatMap(_ => msg.committableOffset.commitScaladsl()) } // Connect a Consumer to Producer example: Consumer.committableSource(consumerSettings.withClientId("client1")) .map(msg => ProducerMessage.Message( new ProducerRecord[Array[Byte], String]("topic2", msg.value), msg.committableOffset)) .to(Producer.commitableSink(producerSettings)) // Consume messages at-least-once, and commit in batches example: Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1")) .mapAsync(1) { msg => db.update(msg.value).map(_ => msg.committableOffset) } .batch(max = 10, first => CommittableOffsetBatch.empty.updated(first)) { (batch, elem) => batch.updated(elem) }.mapAsync(1)(_.commitScaladsl()) // A reusable Kafka consumer example: val consumer: ActorRef = system.actorOf(KafkaConsumerActor.props(consumerSettings)) // Manually assign topic partition to it val stream1 = Consumer .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 1))) .via(business) .to(Sink.ignore) // Manually assign another topic partition val stream2 = Consumer .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 2))) .via(business) .to(Sink.ignore) // Consumer group example: val consumerGroup = Consumer.committablePartitionedSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1")) // Process each assigned partition separately consumerGroup.map { case (topicPartition, source) => source .via(business) .toMat(Sink.ignore)(Keep.both) .run() }.mapAsyncUnordered(maxPartitions)(_._2)
Example 7
Source File: TestConsumer.scala From asura with MIT License | 5 votes |
package asura.kafka.consumer import akka.actor.ActorSystem import akka.kafka.scaladsl.Consumer import akka.kafka.{ConsumerSettings, Subscriptions} import akka.stream.ActorMaterializer import akka.stream.scaladsl.Sink import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.common.serialization.StringDeserializer object TestConsumer extends StrictLogging { def main(args: Array[String]): Unit = { logger.info("Start consumer") implicit val system = ActorSystem("consumer") implicit val materializer = ActorMaterializer() implicit val ec = system.dispatcher val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer) .withGroupId("test-group1") val done = Consumer .plainSource(consumerSettings, Subscriptions.topics("test-topic")) .runWith(Sink.foreach(record => logger.info(s"topic:${record.topic()}, partition:${record.partition()}, offset:${record.offset()}, key:${record.key()}, value: ${record.value()}")) ) done onComplete { case scala.util.Success(_) => logger.info("Done"); system.terminate() case scala.util.Failure(err) => logger.error(err.toString); system.terminate() } } }
Example 8
Source File: CountIntByStreaming.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.kafka import com.sev7e0.wow.spark_streaming.StreamingLogger import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.SparkConf import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} import org.apache.spark.streaming.{Seconds, StreamingContext} ; object CountIntByStreaming { val brokerList = "localhost:9092" val topic = "randomCount" val groupId = "group"; val path = "temp/checkpoint/CountIntBySS"; val master = "local"; def main(args: Array[String]): Unit = { val prop = initProperties() val topics = Array(topic) //设置打印日志级别 StreamingLogger.setLoggerLevel() val sparkConf = new SparkConf() .setAppName(CountIntByStreaming.getClass.getName) .setMaster(master) //实例化StreamingContext,设置间隔两秒 val ssc = new StreamingContext(sparkConf, Seconds(2)) //设置checkpoint路径 ssc.checkpoint(path) //使用KafkaUtils获取DStream val kafkaDS = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, prop)) kafkaDS.map(record => { val value = record.value().toLong value }).reduce(_ + _).print() def initProperties(): Map[String, Object] = Map[String, Object]( ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer], ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer], ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList, ConsumerConfig.GROUP_ID_CONFIG -> groupId, ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest", ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean) ) }
Example 9
Source File: package.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.streaming.{Seconds, StreamingContext} package object example { def setupLogging(): Unit = { import org.apache.log4j.{Level, Logger} val rootLogger = Logger.getRootLogger rootLogger.setLevel(Level.ERROR) } def kafkaParams = Map[String, Object]( "bootstrap.servers" -> "127.0.0.1:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "mygroup1", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) def launchWithCheckpointing(logic: StreamingContext => Unit, appName:String, checkpointPath:String): Unit = { val streamingContext = new StreamingContext("local[*]", appName, Seconds(2)) setupLogging() logic.apply(streamingContext) streamingContext.checkpoint(checkpointPath) streamingContext.start() streamingContext.awaitTermination() } def launchWithItself(logic: StreamingContext => Unit, appName:String): Unit = { val streamingContext = new StreamingContext("local[*]", appName, Seconds(2)) setupLogging() logic.apply(streamingContext) streamingContext.start() streamingContext.awaitTermination() } }
Example 10
Source File: package.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com import java.util.Collections import cakesolutions.kafka.{KafkaConsumer, KafkaProducer} import cakesolutions.kafka.KafkaProducer.Conf import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} package object example { val topic = "sample_topic" val kafkaProducer = KafkaProducer( Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = "localhost:9092") ) val kafkaProducerConf = KafkaProducer.Conf( new StringSerializer, new StringSerializer, bootstrapServers = "localhost:9092" ) val kafkaConsumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "test_group", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST, bootstrapServers = "localhost:9092") }
Example 11
Source File: package.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com import akka.actor.ActorSystem import akka.kafka.{ConsumerSettings, ProducerSettings} import akka.stream.ActorMaterializer import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer} package object example { implicit val system = ActorSystem("FlowProducerMain") implicit val materializer = ActorMaterializer() val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer) .withBootstrapServers("localhost:9092") val topic = "sample_topic" val topic1 = "topic1" val topic2 = "topic2" val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers("localhost:9092") .withGroupId("group1") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest") }
Example 12
Source File: ConsumerSelfManaged.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.examples import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} import cakesolutions.kafka.KafkaConsumer import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe} import cakesolutions.kafka.akka.{ConsumerRecords, Extractor, KafkaConsumerActor, Offsets} import com.typesafe.config.Config import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import scala.concurrent.duration._ def apply(config: Config): ActorRef = { val consumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "groupId", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST) .withConf(config) val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds) val system = ActorSystem() system.actorOf(Props(new ConsumerSelfManaged(consumerConf, actorConf))) } } class ConsumerSelfManaged( kafkaConfig: KafkaConsumer.Conf[String, String], actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging { val recordsExt: Extractor[Any, ConsumerRecords[String, String]] = ConsumerRecords.extractor[String, String] val consumer: ActorRef = context.actorOf( KafkaConsumerActor.props(kafkaConfig, actorConfig, self) ) consumer ! Subscribe.ManualOffset(Offsets(Map((new TopicPartition("topic1", 0), 1)))) override def receive: Receive = { // Records from Kafka case recordsExt(records) => processRecords(records) sender() ! Confirm(records.offsets) } private def processRecords(records: ConsumerRecords[String, String]) = { records.pairs.foreach { case (key, value) => log.info(s"Received [$key,$value]") } log.info(s"Batch complete, offsets: ${records.offsets}") } }
Example 13
Source File: AutoPartitionConsumer.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.examples import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} import cakesolutions.kafka.KafkaConsumer import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe} import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor} import com.typesafe.config.{Config, ConfigFactory} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.StringDeserializer import scala.concurrent.duration._ def apply(config: Config): ActorRef = { val consumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "test_group", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST) .withConf(config) val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds) val system = ActorSystem() system.actorOf(Props(new AutoPartitionConsumer(consumerConf, actorConf))) } } class AutoPartitionConsumer( kafkaConfig: KafkaConsumer.Conf[String, String], actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging { private val recordsExt = ConsumerRecords.extractor[String, String] private val consumer = context.actorOf( KafkaConsumerActor.props(kafkaConfig, actorConfig, self) ) context.watch(consumer) consumer ! Subscribe.AutoPartition(List("topic1")) override def receive: Receive = { // Records from Kafka case recordsExt(records) => processRecords(records.pairs) sender() ! Confirm(records.offsets, commit = true) } private def processRecords(records: Seq[(Option[String], String)]) = records.foreach { case (key, value) => log.info(s"Received [$key,$value]") } }
Example 14
Source File: ConsumerToProducer.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.examples import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe} import cakesolutions.kafka.akka._ import cakesolutions.kafka.{KafkaConsumer, KafkaProducer} import com.typesafe.config.{Config, ConfigFactory} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.concurrent.duration._ def apply(consumerConfig: Config, producerConfig: Config): ActorRef = { // Create KafkaConsumerActor config with bootstrap.servers specified in Typesafe config val consumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "test_group", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST) .withConf(consumerConfig) val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds, 5) // Create KafkaProducerActor config with defaults and bootstrap.servers specified in Typesafe config val producerConf = KafkaProducer.Conf(new StringSerializer, new StringSerializer).withConf(producerConfig) val system = ActorSystem() system.actorOf(Props(new ConsumerToProducer(consumerConf, actorConf, producerConf))) } } class ConsumerToProducer( kafkaConfig: KafkaConsumer.Conf[String, String], actorConfig: KafkaConsumerActor.Conf, producerConf: KafkaProducer.Conf[String, String]) extends Actor with ActorLogging { private val recordsExt = ConsumerRecords.extractor[String, String] // The KafkaConsumerActor private val consumer = context.actorOf( KafkaConsumerActor.props(kafkaConfig, actorConfig, self) ) context.watch(consumer) // The KafkaProducerActor private val producer = context.actorOf(KafkaProducerActor.props(producerConf)) consumer ! Subscribe.AutoPartition(List("topic1")) override def receive: Receive = { // Records from Kafka case recordsExt(records) => processRecords(records) // Confirmed Offsets from KafkaProducer case o: Offsets => consumer ! Confirm(o, commit = true) } // Demonstrates some transformation of the messages before forwarding to KafkaProducer private def processRecords(records: ConsumerRecords[String, String]) = { val transformedRecords = records.pairs.map { case (key, value) => (key, value + ".") } // Send records to Topic2. Offsets will be sent back to this actor once confirmed. producer ! ProducerRecords.fromKeyValues[String, String]("topic2", transformedRecords, Some(records.offsets), None) // Could have sent them like this if we didn't first transform: // producer ! ProducerRecords.fromConsumerRecords("topic2", records, None) } }
Example 15
Source File: ConsumerRecovery.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.examples import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, OneForOneStrategy, Props, SupervisorStrategy} import cakesolutions.kafka.KafkaConsumer import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe} import cakesolutions.kafka.akka.{ConsumerRecords, Extractor, KafkaConsumerActor} import com.typesafe.config.{Config, ConfigFactory} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.StringDeserializer import scala.concurrent.duration._ override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy(maxNrOfRetries = 10) { case _: KafkaConsumerActor.ConsumerException => log.info("Consumer exception caught. Restarting consumer.") SupervisorStrategy.Restart case _ => SupervisorStrategy.Escalate } val recordsExt: Extractor[Any, ConsumerRecords[String, String]] = ConsumerRecords.extractor[String, String] val consumer: ActorRef = context.actorOf( KafkaConsumerActor.props(kafkaConfig, actorConfig, self) ) consumer ! Subscribe.AutoPartition(List("topic1")) override def receive: Receive = { // Records from Kafka case recordsExt(records) => processRecords(records.pairs) sender() ! Confirm(records.offsets, commit = true) } private def processRecords(records: Seq[(Option[String], String)]) = records.foreach { case (key, value) => log.info(s"Received [$key,$value]") } }
Example 16
Source File: AutoPartitionConsumerWithManualOffset.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.examples import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} import cakesolutions.kafka.KafkaConsumer import cakesolutions.kafka.akka.KafkaConsumerActor._ import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor, Offsets} import com.typesafe.config.{Config, ConfigFactory} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import scala.concurrent.duration._ def apply(config: Config): ActorRef = { val consumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "test_group", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST) .withConf(config) val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds) val system = ActorSystem() system.actorOf(Props(new AutoPartitionConsumerWithManualOffset(consumerConf, actorConf))) } } class AutoPartitionConsumerWithManualOffset( kafkaConfig: KafkaConsumer.Conf[String, String], actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging { private val recordsExt = ConsumerRecords.extractor[String, String] private val consumer = context.actorOf( KafkaConsumerActor.props(kafkaConfig, actorConfig, self) ) consumer ! Subscribe.AutoPartitionWithManualOffset(List("topic1"), assignedListener, revokedListener) override def receive: Receive = { // Records from Kafka case recordsExt(records) => processRecords(records.pairs) sender() ! Confirm(records.offsets) } private def processRecords(records: Seq[(Option[String], String)]) = records.foreach { case (key, value) => log.info(s"Received [$key,$value]") } private def assignedListener(tps: List[TopicPartition]): Offsets = { log.info("Partitions have been assigned" + tps.toString()) // Should load the offsets from a persistent store and any related state val offsetMap = tps.map{ tp => tp -> 0l }.toMap // Return the required offsets for the assigned partitions Offsets(offsetMap) } private def revokedListener(tps: List[TopicPartition]): Unit = { log.info("Partitions have been revoked" + tps.toString()) // Opportunity to clear any state for the revoked partitions () } }
Example 17
Source File: KafkaConsumerPerfSpec.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka import cakesolutions.kafka.KafkaConsumer.Conf import com.typesafe.config.ConfigFactory import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers} import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.util.Random class KafkaConsumerPerfSpec extends FlatSpecLike with Matchers with BeforeAndAfterAll { val log = LoggerFactory.getLogger(getClass) val config = ConfigFactory.load() val msg1k = scala.io.Source.fromInputStream(getClass.getResourceAsStream("/1k.txt")).mkString val consumer = KafkaConsumer( Conf(config.getConfig("consumer"), new StringDeserializer, new StringDeserializer) ) private def randomString: String = Random.alphanumeric.take(5).mkString("") "Kafka Consumer with single partition topic" should "perform" in { val topic = randomString val producerConf = KafkaProducer.Conf(config.getConfig("producer"), new StringSerializer, new StringSerializer) val producer = KafkaProducer[String, String](producerConf) 1 to 100000 foreach { n => producer.send(KafkaProducerRecord(topic, None, msg1k)) } producer.flush() log.info("Delivered 100000 msg to topic {}", topic) consumer.subscribe(List(topic).asJava) var start = 0l var total = 0 while (total < 100000) { if(total == 0) start = System.currentTimeMillis() val count = consumer.poll(1000).count() total += count } val totalTime = System.currentTimeMillis() - start val messagesPerSec = 100000 / totalTime * 1000 log.info("Total Time millis : {}", totalTime) log.info("Messages per sec : {}", messagesPerSec) totalTime should be < 4000L consumer.close() producer.close() } }
Example 18
Source File: IdempotentProducerSpec.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka import org.apache.kafka.clients.consumer.ConsumerRecords import org.apache.kafka.common.KafkaException import org.apache.kafka.common.requests.IsolationLevel import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.util.Random class IdempotentProducerSpec extends KafkaIntSpec { private val log = LoggerFactory.getLogger(getClass) private def randomString: String = Random.alphanumeric.take(5).mkString("") val idempotentProducerConfig: KafkaProducer.Conf[String, String] = KafkaProducer.Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = s"localhost:$kafkaPort", enableIdempotence = true) val transactionalProducerConfig: KafkaProducer.Conf[String, String] = KafkaProducer.Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = s"localhost:$kafkaPort", transactionalId = Some("t1"), enableIdempotence = true) val consumerConfig: KafkaConsumer.Conf[String, String] = KafkaConsumer.Conf(new StringDeserializer(), new StringDeserializer(), bootstrapServers = s"localhost:$kafkaPort", groupId = randomString, enableAutoCommit = false) val transactionConsumerConfig: KafkaConsumer.Conf[String, String] = KafkaConsumer.Conf(new StringDeserializer(), new StringDeserializer(), bootstrapServers = s"localhost:$kafkaPort", groupId = randomString, enableAutoCommit = false, isolationLevel = IsolationLevel.READ_COMMITTED) "Producer with idempotent config" should "deliver batch" in { val topic = randomString log.info(s"Using topic [$topic] and kafka port [$kafkaPort]") val producer = KafkaProducer(idempotentProducerConfig) val consumer = KafkaConsumer(consumerConfig) consumer.subscribe(List(topic).asJava) val records1 = consumer.poll(1000) records1.count() shouldEqual 0 log.info("Kafka producer connecting on port: [{}]", kafkaPort) producer.send(KafkaProducerRecord(topic, Some("key"), "value")) producer.flush() val records2: ConsumerRecords[String, String] = consumer.poll(1000) records2.count() shouldEqual 1 producer.close() consumer.close() } "Producer with transaction" should "deliver batch" in { val topic = randomString log.info(s"Using topic [$topic] and kafka port [$kafkaPort]") val producer = KafkaProducer(transactionalProducerConfig) val consumer = KafkaConsumer(transactionConsumerConfig) consumer.subscribe(List(topic).asJava) val records1 = consumer.poll(1000) records1.count() shouldEqual 0 log.info("Kafka producer connecting on port: [{}]", kafkaPort) producer.initTransactions() try { producer.beginTransaction() producer.send(KafkaProducerRecord(topic, Some("key"), "value")) producer.commitTransaction() } catch { case ex: KafkaException => log.error(ex.getMessage, ex) producer.abortTransaction() } val records2: ConsumerRecords[String, String] = consumer.poll(1000) records2.count() shouldEqual 1 producer.close() consumer.close() } }
Example 19
Source File: KafkaProducerActorSpec.scala From scala-kafka-client with MIT License | 5 votes |
package cakesolutions.kafka.akka import akka.actor.ActorSystem import akka.testkit.TestProbe import cakesolutions.kafka.{KafkaConsumer, KafkaProducer, KafkaProducerRecord} import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.util.Random class KafkaProducerActorSpec(system_ : ActorSystem) extends KafkaIntSpec(system_) { def this() = this(ActorSystem("KafkaProducerActorSpec")) private def randomString: String = Random.alphanumeric.take(5).mkString("") val deserializer = new StringDeserializer val consumerConf = KafkaConsumer.Conf( deserializer, deserializer, bootstrapServers = s"localhost:$kafkaPort", groupId = "test", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST ) val serializer = new StringSerializer val producerConf = KafkaProducer.Conf(serializer, serializer, bootstrapServers = s"localhost:$kafkaPort") "KafkaProducerActor" should "write a given batch to Kafka" in { val topic = randomString val probe = TestProbe() val producer = system.actorOf(KafkaProducerActor.props(producerConf)) val batch: Seq[ProducerRecord[String, String]] = Seq( KafkaProducerRecord(topic, "foo"), KafkaProducerRecord(topic, "key", "value"), KafkaProducerRecord(topic, "bar")) val message = ProducerRecords(batch, Some('response)) probe.send(producer, message) probe.expectMsg('response) val results = consumeFromTopic(topic, 3, 10000) results(0) shouldEqual ((None, "foo")) results(1) shouldEqual ((Some("key"), "value")) results(2) shouldEqual ((None, "bar")) } "KafkaProducerActor" should "write a given batch to Kafka, requiring no response" in { import scala.concurrent.duration._ val topic = randomString val probe = TestProbe() val producer = system.actorOf(KafkaProducerActor.props(producerConf)) val batch: Seq[ProducerRecord[String, String]] = Seq( KafkaProducerRecord(topic, "foo"), KafkaProducerRecord(topic, "key", "value"), KafkaProducerRecord(topic, "bar") ) val message = ProducerRecords(batch) probe.send(producer, message) probe.expectNoMessage(3.seconds) val results = consumeFromTopic(topic, 3, 10000) results(0) shouldEqual ((None, "foo")) results(1) shouldEqual ((Some("key"), "value")) results(2) shouldEqual ((None, "bar")) } private def consumeFromTopic(topic: String, expectedNumOfMessages: Int, timeout: Long) = kafkaServer.consume(topic, expectedNumOfMessages, timeout, deserializer, deserializer) }
Example 20
Source File: Kafka2OdpsDemo.scala From MaxCompute-Spark with Apache License 2.0 | 5 votes |
package com.aliyun.odps.spark.examples.streaming.kafka import com.aliyun.odps.spark.examples.streaming.common.SparkSessionSingleton import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} object Kafka2OdpsDemo { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("test") val ssc = new StreamingContext(sparkConf, Seconds(10)) // 请使用OSS作为Checkpoint存储,修改为有效OSS路径。OSS访问文档请参考 https://github.com/aliyun/MaxCompute-Spark/wiki/08.-Oss-Access%E6%96%87%E6%A1%A3%E8%AF%B4%E6%98%8E ssc.checkpoint("oss://bucket/checkpointdir") // kafka配置参数 val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "localhost:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "testGroupId", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) // 创建kafka dstream val topics = Set("test") val recordDstream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams) ) val dstream = recordDstream.map(f => (f.key(), f.value())) // 解析kafka数据并写入odps val data: DStream[String] = dstream.map(_._2) val wordsDStream: DStream[String] = data.flatMap(_.split(" ")) wordsDStream.foreachRDD(rdd => { val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf) import spark.implicits._ rdd.toDF("id").write.mode("append").saveAsTable("test_table") }) ssc.start() ssc.awaitTermination() } }
Example 21
Source File: KafkaStreamingDemo.scala From MaxCompute-Spark with Apache License 2.0 | 5 votes |
package com.aliyun.odps.spark.examples.streaming.kafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} import org.apache.spark.streaming.{Seconds, StreamingContext} object KafkaStreamingDemo { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("KafkaStreamingDemo") .getOrCreate() val ssc = new StreamingContext(spark.sparkContext, Seconds(5)) // 请使用OSS作为Checkpoint存储 ssc.checkpoint("oss://bucket/checkpointDir/") // kafka配置参数 val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "192.168.1.1:9200,192.168.1.2:9200,192.168.1.3:9200", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "testGroupId", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val topics = Set("event_topic") val recordDstream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams) ) val dstream = recordDstream.map(f => (f.key(), f.value())) val data: DStream[String] = dstream.map(_._2) val wordsDStream: DStream[String] = data.flatMap(_.split(" ")) val wordAndOneDstream: DStream[(String, Int)] = wordsDStream.map((_, 1)) val result: DStream[(String, Int)] = wordAndOneDstream.reduceByKey(_ + _) result.print() ssc.start() ssc.awaitTermination() } }
Example 22
Source File: UseCase.scala From Fast-Data-Processing-Systems-with-SMACK-Stack with MIT License | 5 votes |
import akka.actor.ActorSystem import akka.stream.ActorMaterializer import akka.stream.scaladsl.{Sink, Source} import com.softwaremill.react.kafka.KafkaMessages._ import org.apache.kafka.common.serialization.{StringSerializer, StringDeserializer} import com.softwaremill.react.kafka.{ProducerMessage, ConsumerProperties, ProducerProperties, ReactiveKafka} import org.reactivestreams.{ Publisher, Subscriber } implicit val actorSystem = ActorSystem("ReactiveKafka") implicit val materializer = ActorMaterializer() val kafka = new ReactiveKafka() val publisher: Publisher[StringConsumerRecord] = kafka.consume(ConsumerProperties( bootstrapServers = "localhost:9092", topic = "lowercaseStrings", groupId = "groupName", valueDeserializer = new StringDeserializer() )) val subscriber: Subscriber[StringProducerMessage] = kafka.publish(ProducerProperties( bootstrapServers = "localhost:9092", topic = "uppercaseStrings", valueSerializer = new StringSerializer() )) Source.fromPublisher(publisher).map(m => ProducerMessage(m.value().toUpperCase)) .to(Sink.fromSubscriber(subscriber)).run()
Example 23
Source File: EventAggregationSpec.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.util import com.twilio.open.protocol.Calls.CallEvent import com.twilio.open.protocol.Metrics import com.twilio.open.streaming.trend.discovery.streams.EventAggregation import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer} import org.apache.spark.sql.streaming.{OutputMode, Trigger} import org.apache.spark.sql._ import org.apache.spark.sql.kafka010.KafkaTestUtils import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] { override val testUtils = new KafkaTestUtils[String, CallEvent] { override val keySerializer: Serializer[String] = new StringSerializer override val keyDeserializer: Deserializer[String] = new StringDeserializer override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer } override protected val kafkaTopic = "spark.summit.call.events" override protected val partitions = 8 private val pathToTestScenarios = "src/test/resources/scenarios" val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation]) lazy val session: SparkSession = sparkSql override def conf: SparkConf = { new SparkConf() .setMaster("local[*]") .setAppName("aggregation-test-app") .set("spark.ui.enabled", "false") .set("spark.app.id", appID) .set("spark.driver.host", "localhost") .set("spark.sql.shuffle.partitions", "32") .set("spark.executor.cores", "4") .set("spark.executor.memory", "1g") .set("spark.ui.enabled", "false") .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList) } test("Should aggregate call events") { import session.implicits._ val appConfig = appConfigForTest() val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json") val scenarioIter = scenario.toIterator scenario.nonEmpty shouldBe true testUtils.createTopic(kafkaTopic, partitions, overwrite = true) sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime) val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session) val eventAggregation = EventAggregation(appConfig) eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session) .writeStream .queryName("calleventaggs") .format("memory") .outputMode(eventAggregation.outputMode) .start() .processAllAvailable() val df = session.sql("select * from calleventaggs") df.printSchema() df.show val res = session .sql("select avg(stats.p99) from calleventaggs") .collect() .map { r => r.getAs[Double](0) } .head DiscoveryUtils.round(res) shouldEqual 7.13 } } class CallEventSerializer extends Serializer[CallEvent] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray override def close(): Unit = {} } class CallEventDeserializer extends Deserializer[CallEvent] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data) override def close(): Unit = {} }
Example 24
Source File: KafkaStream.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.streaming import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf.{ConfigurableStreamingStop, Port, StopGroup} import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.kafka010.KafkaUtils import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe class KafkaStream extends ConfigurableStreamingStop{ override var batchDuration: Int = _ override val authorEmail: String = "[email protected]" override val description: String = "Read data from kafka" override val inportList: List[String] = List(Port.DefaultPort) override val outportList: List[String] = List(Port.DefaultPort) var brokers:String = _ var groupId:String = _ var topics:Array[String] = _ override def setProperties(map: Map[String, Any]): Unit = { brokers=MapUtil.get(map,key="brokers").asInstanceOf[String] groupId=MapUtil.get(map,key="groupId").asInstanceOf[String] topics=MapUtil.get(map,key="topics").asInstanceOf[String].split(",").map(x => x.trim) val timing = MapUtil.get(map,key="batchDuration") batchDuration=if(timing == None) new Integer(1) else timing.asInstanceOf[String].toInt } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val brokers = new PropertyDescriptor().name("brokers").displayName("brokers").description("kafka brokers, seperated by ','").defaultValue("").required(true) val groupId = new PropertyDescriptor().name("groupId").displayName("groupId").description("kafka consumer group").defaultValue("group").required(true) val topics = new PropertyDescriptor().name("topics").displayName("topics").description("kafka topics").defaultValue("").required(true) val batchDuration = new PropertyDescriptor().name("batchDuration").displayName("batchDuration").description("the streaming batch duration").defaultValue("1").required(true) descriptor = brokers :: descriptor descriptor = groupId :: descriptor descriptor = topics :: descriptor descriptor = batchDuration :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/streaming/KafkaStream.png") } override def getGroup(): List[String] = { List(StopGroup.StreamingGroup) } override def getDStream(ssc: StreamingContext): DStream[String] = { val kafkaParams = Map[String, Object]( "bootstrap.servers" -> brokers, "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> groupId, "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false:java.lang.Boolean) ) val stream = KafkaUtils.createDirectStream[String,String]( ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams) ) stream.map(record => record.key() + "," + record.value()) //stream.asInstanceOf[DStream[ConsumerRecord]] } override def initialize(ctx: ProcessContext): Unit = {} override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {} }
Example 25
Source File: ConfManager.scala From HadoopLearning with MIT License | 5 votes |
package com.utils import java.util.regex.Pattern import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.mutable.HashMap /** * 描述 Spark Streaming 配置 * * @author liumm * @since 2018-07-27 20:27 */ object ConfManager { /** * 每次入库最大记录数量 */ val maxRecords = 1000 /** * 配置Kafka * * @param streamConf * @return */ def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = { (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics)) } def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = { val kafkaParams = new HashMap[String, String]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers) kafkaParams += ("metadata.broker.list" -> streamConf.brokers) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest") kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId) kafkaParams.toMap } /** * 生成Kafka的Consumer配置信息 * * @return Kafka的Consumer配置信息 */ private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = { val kafkaParams = new HashMap[String, Object]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers) kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId) kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024)) kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100)) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest") //关闭kafka自动提交offset方式 kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)) kafkaParams.toMap } def newStreamConf() = { val conf = new StreamConf() conf.zkUrl = "hdp01:2181" conf.brokers = "hdp01:9092" conf.groupId = "liumm_group" conf.topics = "i57_.*" conf } }
Example 26
Source File: WordCountConsumer.scala From akka_streams_tutorial with MIT License | 5 votes |
package alpakka.kafka import akka.Done import akka.actor.{ActorSystem, Props} import akka.kafka.scaladsl.Consumer.DrainingControl import akka.kafka.scaladsl.{Committer, Consumer} import akka.kafka.{CommitterSettings, ConsumerSettings, Subscriptions} import akka.stream.scaladsl.Sink import akka.util.Timeout import alpakka.kafka.TotalFake.{IncrementMessage, IncrementWord} import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer} import scala.concurrent.Future import scala.concurrent.duration._ object WordCountConsumer extends App { implicit val system = ActorSystem("WordCountConsumer") implicit val ec = system.dispatcher val total = system.actorOf(Props[TotalFake], "totalFake") val committerSettings = CommitterSettings(system).withMaxBatch(1) def createConsumerSettings(group: String): ConsumerSettings[String, java.lang.Long] = { ConsumerSettings(system, new StringDeserializer , new LongDeserializer) .withBootstrapServers("localhost:9092") .withGroupId(group) //Define consumer behavior upon starting to read a partition for which it does not have a committed offset or if the committed offset it has is invalid .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") } def createAndRunConsumerWordCount(id: String) = { Consumer.committableSource(createConsumerSettings("wordcount consumer group"), Subscriptions.topics("wordcount-output")) .mapAsync(1) { msg => //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}") if (msg.record.key().equalsIgnoreCase("fakeNews")) { //hardcoded because WordCountProducer.fakeNewsKeyword does not work import akka.pattern.ask implicit val askTimeout: Timeout = Timeout(3.seconds) (total ? IncrementWord(msg.record.value.toInt, id)) .mapTo[Done] .map(_ => msg.committableOffset) } else { Future(msg).map(_ => msg.committableOffset) } } .via(Committer.flow(committerSettings)) .toMat(Sink.seq)(DrainingControl.apply) .run() } def createAndRunConsumerMessageCount(id: String) = { Consumer.committableSource(createConsumerSettings("messagecount consumer group"), Subscriptions.topics("messagecount-output")) .mapAsync(1) { msg => //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}") import akka.pattern.ask implicit val askTimeout: Timeout = Timeout(3.seconds) (total ? IncrementMessage(msg.record.value.toInt, id)) .mapTo[Done] .map(_ => msg.committableOffset) } .via(Committer.flow(committerSettings)) .toMat(Sink.seq)(DrainingControl.apply) .run() } val drainingControlW1 = createAndRunConsumerWordCount("W.1") val drainingControlW2 = createAndRunConsumerWordCount("W.2") val drainingControlM = createAndRunConsumerMessageCount("M") sys.addShutdownHook{ println("Got control-c cmd from shell, about to shutdown...") drainingControlW1.drainAndShutdown() drainingControlW2.drainAndShutdown() drainingControlM.drainAndShutdown() } }
Example 27
Source File: SKRSpec.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import java.util.concurrent.atomic.AtomicInteger import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} import org.apache.spark.streaming.{Seconds, StreamingContext} import org.scalatest.concurrent.Eventually import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import scala.collection.mutable.ArrayBuffer import scala.util.Random import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec case class Foo(a: Int, b: String) trait SKRSpec extends AnyWordSpec with Matchers with BeforeAndAfterEach with BeforeAndAfterAll with Eventually { val sparkConf = new SparkConf() .setMaster("local[1]") .setAppName(getClass.getSimpleName) var ktu: KafkaTestUtils = _ override def beforeAll(): Unit = { ktu = new KafkaTestUtils ktu.setup() } override def afterAll(): Unit = { SKRSpec.callbackTriggerCount.set(0) if (ktu != null) { ktu.tearDown() ktu = null } } var topic: String = _ var ssc: StreamingContext = _ var spark: SparkSession = _ override def afterEach(): Unit = { if (ssc != null) { ssc.stop() ssc = null } if (spark != null) { spark.stop() spark = null } } override def beforeEach(): Unit = { ssc = new StreamingContext(sparkConf, Seconds(1)) spark = SparkSession.builder .config(sparkConf) .getOrCreate() topic = s"topic-${Random.nextInt()}" ktu.createTopics(topic) } def collect(ssc: StreamingContext, topic: String): ArrayBuffer[String] = { val kafkaParams = Map( "bootstrap.servers" -> ktu.brokerAddress, "auto.offset.reset" -> "earliest", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "test-collect" ) val results = new ArrayBuffer[String] KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Set(topic), kafkaParams) ).map(_.value()) .foreachRDD { rdd => results ++= rdd.collect() () } results } val producerConfig = Map( "bootstrap.servers" -> "127.0.0.1:9092", "key.serializer" -> classOf[StringSerializer].getName, "value.serializer" -> classOf[StringSerializer].getName ) } object SKRSpec { val callbackTriggerCount = new AtomicInteger() }
Example 28
Source File: WebSocket.scala From trucking-iot with Apache License 2.0 | 5 votes |
package controllers import javax.inject.{Inject, Singleton} import akka.actor.{Actor, ActorRef, ActorSystem, Props} import akka.kafka.scaladsl.Consumer import akka.kafka.{ConsumerSettings, Subscriptions} import akka.stream.scaladsl.Sink import akka.stream.{Materializer, ThrottleMode} import com.typesafe.config.ConfigFactory import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer} import play.api.libs.streams.ActorFlow import play.api.mvc.{Controller, WebSocket} import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.concurrent.duration._ //@Singleton class KafkaWebSocket @Inject() (implicit system: ActorSystem, materializer: Materializer) extends Controller { def kafkaWS = WebSocket.accept[String, String] { request => ActorFlow.actorRef(out => KafkaWSActor.props(out)) } object KafkaWSActor { def props(outRef: ActorRef) = Props(new KafkaWSActor(outRef)) } class KafkaWSActor(outRef: ActorRef) extends Actor { val config = ConfigFactory.load() val combinedConfig = ConfigFactory.defaultOverrides() .withFallback(config) .withFallback(ConfigFactory.defaultApplication()) .getConfig("trucking-web-application.backend") val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer) //.withBootstrapServers("sandbox-hdf.hortonworks.com:6667") .withBootstrapServers(combinedConfig.getString("kafka.bootstrap-servers")) .withGroupId("group1") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") Consumer.committableSource(consumerSettings, Subscriptions.topics("trucking_data_joined")) .mapAsync(1) { msg => Future(outRef ! msg.record.value).map(_ => msg) } //.mapAsync(1) { msg => msg.committableOffset.commitScaladsl() } // TODO: Disabling commits for debug .throttle(1, 250.milliseconds, 1, ThrottleMode.Shaping) .runWith(Sink.ignore) def receive = { case msg: String => outRef ! s"Ack: $msg" } } }
Example 29
Source File: ConsumerStream.scala From reactive-kafka-microservice-template with Apache License 2.0 | 5 votes |
package com.omearac.consumers import akka.actor.{ActorRef, ActorSystem} import akka.kafka.ConsumerMessage.CommittableOffsetBatch import akka.kafka.scaladsl.Consumer import akka.kafka.{ConsumerMessage, ConsumerSettings, Subscriptions} import akka.stream.scaladsl.{Flow, Sink} import com.omearac.shared.EventMessages.FailedMessageConversion import com.omearac.shared.JsonMessageConversion.Conversion import com.omearac.shared.{AkkaStreams, EventSourcing} import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer} import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future trait ConsumerStream extends AkkaStreams with EventSourcing { implicit val system: ActorSystem def self: ActorRef def createStreamSink(consumerActorSink : ActorRef) = { Sink.actorRefWithAck(consumerActorSink, "STREAM_INIT", "OK", "STREAM_DONE") } def createStreamSource(consumerProperties: Map[String,String]) = { val kafkaMBAddress = consumerProperties("bootstrap-servers") val groupID = consumerProperties("groupId") val topicSubscription = consumerProperties("subscription-topic") val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers(kafkaMBAddress) .withGroupId(groupID) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") Consumer.committableSource(consumerSettings, Subscriptions.topics(topicSubscription)) } def createStreamFlow[msgType: Conversion] = { Flow[ConsumerMessage.CommittableMessage[Array[Byte], String]] .map(msg => (msg.committableOffset, Conversion[msgType].convertFromJson(msg.record.value))) //Publish the conversion error event messages returned from the JSONConversion .map (tuple => publishConversionErrors[msgType](tuple)) .filter(result => result.isRight) .map(test => test.right.get) //Group the commit offsets and correctly converted messages for more efficient Kafka commits .batch(max = 20, tuple => (CommittableOffsetBatch.empty.updated(tuple._1), ArrayBuffer[msgType](tuple._2))) {(tupleOfCommitOffsetAndMsgs, tuple) => (tupleOfCommitOffsetAndMsgs._1.updated(tuple._1), tupleOfCommitOffsetAndMsgs._2 :+ tuple._2) } //Take the first element of the tuple (set of commit numbers) to add to kafka commit log and then return the collection of grouped case class messages .mapAsync(4)(tupleOfCommitOffsetAndMsgs => commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs)) .map(msgGroup => msgGroup._2) } def commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs: (ConsumerMessage.CommittableOffsetBatch, ArrayBuffer[msgType])) = Future { (tupleOfCommitOffsetAndMsgs._1.commitScaladsl(), tupleOfCommitOffsetAndMsgs._2) } def publishConversionErrors[msgType](tupleOfCommitOffsetAndConversionResults: (ConsumerMessage.CommittableOffset, Either[FailedMessageConversion,msgType])) : Either[Unit,(ConsumerMessage.CommittableOffset,msgType)] = { if (tupleOfCommitOffsetAndConversionResults._2.isLeft) { //Publish a local event that there was a failure in conversion publishLocalEvent(tupleOfCommitOffsetAndConversionResults._2.left.get) //Commit the Kafka Offset to acknowledge that the message was consumed Left(tupleOfCommitOffsetAndConversionResults._1.commitScaladsl()) } else Right(tupleOfCommitOffsetAndConversionResults._1,tupleOfCommitOffsetAndConversionResults._2.right.get) } }
Example 30
Source File: OpenWhiskEvents.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.monitoring.metrics import akka.actor.{ActorSystem, CoordinatedShutdown} import akka.event.slf4j.SLF4JLogging import akka.http.scaladsl.Http import akka.kafka.ConsumerSettings import akka.stream.ActorMaterializer import com.typesafe.config.Config import kamon.Kamon import kamon.prometheus.PrometheusReporter import org.apache.kafka.common.serialization.StringDeserializer import pureconfig._ import pureconfig.generic.auto._ import scala.concurrent.duration.FiniteDuration import scala.concurrent.{ExecutionContext, Future} object OpenWhiskEvents extends SLF4JLogging { case class MetricConfig(port: Int, enableKamon: Boolean, ignoredNamespaces: Set[String], renameTags: Map[String, String], retry: RetryConfig) case class RetryConfig(minBackoff: FiniteDuration, maxBackoff: FiniteDuration, randomFactor: Double, maxRestarts: Int) def start(config: Config)(implicit system: ActorSystem, materializer: ActorMaterializer): Future[Http.ServerBinding] = { implicit val ec: ExecutionContext = system.dispatcher val prometheusReporter = new PrometheusReporter() Kamon.registerModule("prometheus", prometheusReporter) Kamon.init(config) val metricConfig = loadConfigOrThrow[MetricConfig](config, "whisk.user-events") val prometheusRecorder = PrometheusRecorder(prometheusReporter, metricConfig) val recorders = if (metricConfig.enableKamon) Seq(prometheusRecorder, KamonRecorder) else Seq(prometheusRecorder) val eventConsumer = EventConsumer(eventConsumerSettings(defaultConsumerConfig(config)), recorders, metricConfig) CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind, "shutdownConsumer") { () => eventConsumer.shutdown() } val port = metricConfig.port val api = new PrometheusEventsApi(eventConsumer, prometheusRecorder) val httpBinding = Http().bindAndHandle(api.routes, "0.0.0.0", port) httpBinding.foreach(_ => log.info(s"Started the http server on http://localhost:$port"))(system.dispatcher) httpBinding } def eventConsumerSettings(config: Config): ConsumerSettings[String, String] = ConsumerSettings(config, new StringDeserializer, new StringDeserializer) def defaultConsumerConfig(globalConfig: Config): Config = globalConfig.getConfig("akka.kafka.consumer") }
Example 31
Source File: ProcessingKafkaApplication.scala From Akka-Cookbook with MIT License | 5 votes |
package com.packt.chapter8 import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions} import akka.stream.{ActorMaterializer, ClosedShape} import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer} import scala.concurrent.duration._ object ProcessingKafkaApplication extends App { implicit val actorSystem = ActorSystem("SimpleStream") implicit val actorMaterializer = ActorMaterializer() val bootstrapServers = "localhost:9092" val kafkaTopic = "akka_streams_topic" val partition = 0 val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition)) val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServers) .withGroupId("akka_streams_group") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer) .withBootstrapServers(bootstrapServers) val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder => import GraphDSL.Implicits._ val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!") val kafkaSource = Consumer.plainSource(consumerSettings, subscription) val kafkaSink = Producer.plainSink(producerSettings) val printlnSink = Sink.foreach(println) val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem)) val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value()) tickSource ~> mapToProducerRecord ~> kafkaSink kafkaSource ~> mapFromConsumerRecord ~> printlnSink ClosedShape }) runnableGraph.run() }
Example 32
Source File: KafkaClient.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.interfaces.async.kafka import java.util.UUID import java.util.concurrent.atomic.AtomicBoolean import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import scala.collection.JavaConverters._ import scala.concurrent.{Future, Promise} class TopicProducer[K, V]( producer: KafkaProducer[K, V], topic: String ) { def send(key:K, value: V): Unit = { val record = new ProducerRecord(topic, key, value) producer.send(record) } def close(): Unit = { producer.close() } } object TopicProducer { def apply( host: String, port: Int, topic: String): TopicProducer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer) new TopicProducer(producer, topic) } } class TopicConsumer[K, V]( consumer: KafkaConsumer[K, V], topic: String, timeout: Long = 100 ) { private val promise = Promise[Unit] private val stopped = new AtomicBoolean(false) def subscribe(f: (K, V) => Unit): Future[Unit] = { run(f) promise.future } private def run(f: (K, V) => Unit): Unit = { consumer.subscribe(Seq(topic).asJava) val thread = new Thread(new Runnable { override def run(): Unit = { while (!stopped.get()) { val records = consumer.poll(timeout).asScala records.foreach(r => f(r.key(), r.value())) } promise.success(()) } }) thread.setName(s"kafka-topic-consumer-$topic") thread.start() } def close(): Future[Unit] = { stopped.set(true) promise.future } } object TopicConsumer { def apply( host: String, port: Int, topic: String): TopicConsumer[String, String] = { val props = new java.util.Properties() props.put("bootstrap.servers", s"$host:$port") props.put("group.id", "mist-" + UUID.randomUUID().toString) props.put("enable.auto.commit", "true") props.put("auto.commit.interval.ms", "1000") props.put("session.timeout.ms", "30000") val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer) new TopicConsumer(consumer, topic) } }
Example 33
Source File: InitialSpec.scala From embedded-kafka with Apache License 2.0 | 5 votes |
package com.tuplejump.embedded.kafka import java.util.concurrent.{TimeUnit, CountDownLatch} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.concurrent.Eventually import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.{Millis, Span} class InitialSpec extends AbstractSpec with Eventually with Logging { private val timeout = Timeout(Span(10000, Millis)) "Initially, EmbeddedKafka" must { val kafka = new EmbeddedKafka() val topic = "test" val total = 1000 val latch = new CountDownLatch(total) "start embedded zookeeper and embedded kafka" in { kafka.isRunning should be (false) kafka.start() eventually(timeout)(kafka.isRunning) } "create a topic" in { kafka.createTopic(topic, 1, 1) } "publish messages to the embedded kafka instance" in { val config = kafka.consumerConfig( group = "some.group", kafkaConnect = kafka.kafkaConfig.hostName + ":" + kafka.kafkaConfig.port, zkConnect = kafka.kafkaConfig.zkConnect, offsetPolicy = "largest",//latest with new consumer autoCommitEnabled = true, kDeserializer = classOf[StringDeserializer], vDeserializer = classOf[StringDeserializer]) val consumer = new SimpleConsumer(latch, config, topic, "consumer.group", 1, 1) val batch1 = for (n <- 0 until total) yield s"message-test-$n" logger.info(s"Publishing ${batch1.size} messages...") kafka.sendMessages(topic, batch1) latch.await(10000, TimeUnit.MILLISECONDS) latch.getCount should be (0) consumer.shutdown() } "shut down relatively cleanly for now" in { kafka.shutdown() eventually(timeout)(!kafka.isRunning) } } }
Example 34
Source File: KafkaUtility.scala From real-time-stream-processing-engine with Apache License 2.0 | 5 votes |
package com.knoldus.streaming.kafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} object KafkaUtility { //TODO It should read from config private val kafkaParams = Map( "bootstrap.servers" -> "localhost:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "auto.offset.reset" -> "earliest", "group.id" -> "tweet-consumer" ) private val preferredHosts = LocationStrategies.PreferConsistent def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, preferredHosts, ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams) ) }
Example 35
Source File: Streams.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.kstream import java.util.function.Supplier import com.expedia.metrics.MetricData import com.expedia.www.haystack.commons.kstreams.serde.metricdata.{MetricDataSerde, MetricTankSerde} import com.expedia.www.haystack.trends.aggregation.TrendMetric import com.expedia.www.haystack.trends.config.AppConfiguration import com.expedia.www.haystack.trends.kstream.processor.{AdditionalTagsProcessorSupplier, ExternalKafkaProcessorSupplier, MetricAggProcessorSupplier} import com.expedia.www.haystack.trends.kstream.store.HaystackStoreBuilder import org.apache.kafka.common.serialization.{Serde, StringDeserializer, StringSerializer} import org.apache.kafka.streams.Topology import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder} import org.slf4j.LoggerFactory import scala.collection.JavaConverters class Streams(appConfiguration: AppConfiguration) extends Supplier[Topology] { private val LOGGER = LoggerFactory.getLogger(classOf[Streams]) private val TOPOLOGY_SOURCE_NAME = "metricpoint-source" private val TOPOLOGY_EXTERNAL_SINK_NAME = "metricpoint-aggegated-sink-external" private val TOPOLOGY_INTERNAL_SINK_NAME = "metric-data-aggegated-sink-internal" private val TOPOLOGY_AGGREGATOR_PROCESSOR_NAME = "metricpoint-aggregator-process" private val TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME = "additional-tags-process" private val TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME = "trend-metric-store" private val kafkaConfig = appConfiguration.kafkaConfig private def initialize(topology: Topology): Topology = { //add source - topic where the raw metricpoints are pushed by the span-timeseries-transformer topology.addSource( kafkaConfig.autoOffsetReset, TOPOLOGY_SOURCE_NAME, kafkaConfig.timestampExtractor, new StringDeserializer, new MetricTankSerde().deserializer(), kafkaConfig.consumeTopic) //The processor which performs aggregations on the metrics topology.addProcessor( TOPOLOGY_AGGREGATOR_PROCESSOR_NAME, new MetricAggProcessorSupplier(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, appConfiguration.encoder), TOPOLOGY_SOURCE_NAME) //key-value, state store associated with each kstreams task(partition) // which keeps the trend-metrics which are currently being computed in memory topology.addStateStore(createTrendMetricStateStore(), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME) // topology to add additional tags if any topology.addProcessor(TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME, new AdditionalTagsProcessorSupplier(appConfiguration.additionalTags), TOPOLOGY_AGGREGATOR_PROCESSOR_NAME) if (appConfiguration.kafkaConfig.producerConfig.enableExternalKafka) { topology.addProcessor( TOPOLOGY_EXTERNAL_SINK_NAME, new ExternalKafkaProcessorSupplier(appConfiguration.kafkaConfig.producerConfig), TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME ) } // adding sinks appConfiguration.kafkaConfig.producerConfig.kafkaSinkTopics.foreach(sinkTopic => { if(sinkTopic.enabled){ val serde = Class.forName(sinkTopic.serdeClassName).newInstance().asInstanceOf[Serde[MetricData]] topology.addSink( s"${TOPOLOGY_INTERNAL_SINK_NAME}-${sinkTopic.topic}", sinkTopic.topic, new StringSerializer, serde.serializer(), TOPOLOGY_ADDITIONAL_TAGS_PROCESSOR_NAME) } }) topology } private def createTrendMetricStateStore(): StoreBuilder[KeyValueStore[String, TrendMetric]] = { val stateStoreConfiguration = appConfiguration.stateStoreConfig val storeBuilder = new HaystackStoreBuilder(TOPOLOGY_AGGREGATOR_TREND_METRIC_STORE_NAME, stateStoreConfiguration.stateStoreCacheSize) if (stateStoreConfiguration.enableChangeLogging) { storeBuilder .withLoggingEnabled(JavaConverters.mapAsJavaMap(stateStoreConfiguration.changeLogTopicConfiguration)) } else { storeBuilder .withLoggingDisabled() } } override def get(): Topology = { val topology = new Topology initialize(topology) } }
Example 36
Source File: WordCountTestableSpec.scala From kafka-streams with Apache License 2.0 | 5 votes |
package com.supergloo.examples import com.supergloo.WordCountTestable import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.apache.kafka.streams.TopologyTestDriver import org.apache.kafka.streams.state.KeyValueStore import org.apache.kafka.streams.test.ConsumerRecordFactory import org.scalatest.{FlatSpec, Matchers} class WordCountTestableSpec extends FlatSpec with Matchers with KafkaTestSetup { val wordCountApplication = new WordCountTestable "Convert streaming data into lowercase and publish into output topic" should "push lower text to kafka" in { val driver = new TopologyTestDriver(wordCountApplication.toLowerCaseStream("input-topic", "output-topic"), config) val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer()) val words = "Hello, WORLDY, World worlD Test" driver.pipeInput(recordFactory.create(words)) val record: ProducerRecord[String, String] = driver.readOutput("output-topic", new StringDeserializer(), new StringDeserializer()) record.value() shouldBe words.toLowerCase driver.close() } "WordCountTestable" should "count number of words" in { val driver = new TopologyTestDriver(wordCountApplication.countNumberOfWords("input-topic", "output-topic", "counts-store"), config) val recordFactory = new ConsumerRecordFactory("input-topic", new StringSerializer(), new StringSerializer()) val words = "Hello Kafka Streams, All streams lead to Kafka" driver.pipeInput(recordFactory.create(words)) val store: KeyValueStore[String, java.lang.Long] = driver.getKeyValueStore("counts-store") store.get("hello") shouldBe 1 store.get("kafka") shouldBe 2 store.get("streams") shouldBe 2 store.get("lead") shouldBe 1 store.get("to") shouldBe 1 driver.close() } }
Example 37
Source File: TheFlashTweetsConsumer.scala From KafkaPlayground with GNU General Public License v3.0 | 5 votes |
package com.github.pedrovgs.kafkaplayground.flash import cakesolutions.kafka.KafkaConsumer.Conf import com.github.pedrovgs.kafkaplayground.flash.elasticsearch.ElasticClient import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.JavaConverters._ import scala.concurrent.duration._ object TheFlashTweetsConsumer { private val groupId = "kafka-elasticsearch-consumer" } class TheFlashTweetsConsumer(private val brokerAddress: String, private val topic: String, private val elasticClient: ElasticClient) { import TheFlashTweetsConsumer._ private val consumer = cakesolutions.kafka.KafkaConsumer( Conf( bootstrapServers = brokerAddress, keyDeserializer = new StringDeserializer(), valueDeserializer = new StringDeserializer(), groupId = s"$topic-$groupId", autoOffsetReset = OffsetResetStrategy.EARLIEST ) ) consumer.subscribe(List(topic).asJava) def poll(): Unit = { println(s"Polling messages from the kafka consumer at topic: $topic.") val records = consumer.poll(10.seconds.toMillis) println(s"We've fetched ${records.count()} records.") records.forEach { record => val id = s"${record.topic()}_${record.partition()}_${record.offset()}" val content = record.value() println(s"Saving content from the topic $topic content into elastic: $content") elasticClient.insertOrUpdate(id, content) } } }
Example 38
Source File: EmbeddedKafkaServer.scala From KafkaPlayground with GNU General Public License v3.0 | 5 votes |
package com.github.pedrovgs.kafkaplayground.utils import cakesolutions.kafka.KafkaProducerRecord import cakesolutions.kafka.testkit.KafkaServer import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.scalatest.{BeforeAndAfter, Suite} import scala.concurrent.duration._ trait EmbeddedKafkaServer extends BeforeAndAfter { this: Suite => private var kafkaServer: KafkaServer = _ before { kafkaServer = new KafkaServer startKafkaServer() } after { stopKafkaServer() } def startKafkaServer(): Unit = kafkaServer.startup() def stopKafkaServer(): Unit = kafkaServer.close() def kafkaServerAddress(): String = s"localhost:${kafkaServer.kafkaPort}" def zookeeperServerAddress(): String = s"localhost:${kafkaServer.zookeeperPort}" def recordsForTopic(topic: String, expectedNumberOfRecords: Int = 1): Iterable[String] = kafkaServer .consume[String, String]( topic = topic, keyDeserializer = new StringDeserializer, valueDeserializer = new StringDeserializer, expectedNumOfRecords = expectedNumberOfRecords, timeout = 10.seconds.toMillis ) .map(_._2) def produceMessage(topic: String, content: String): Unit = kafkaServer.produce( topic = topic, records = Seq(KafkaProducerRecord[String, String](topic = topic, value = content)), keySerializer = new StringSerializer(), valueSerializer = new StringSerializer() ) }
Example 39
Source File: KafkaWirings.scala From ticket-booking-aecor with Apache License 2.0 | 5 votes |
package ru.pavkin.booking import aecor.data.{ Committable, ConsumerId } import cats.data.NonEmptyList import cats.effect._ import fs2.kafka.{ AutoOffsetReset, ConsumerSettings, _ } import org.apache.kafka.common.serialization.StringDeserializer import ru.pavkin.payment.event.PaymentReceived import ru.pavkin.payment.kafka.PaymentReceivedEventDeserializer import scala.concurrent.ExecutionContext final class KafkaWirings[F[_]]( val paymentReceivedEventStream: ConsumerId => fs2.Stream[F, Committable[F, PaymentReceived]] ) object KafkaWirings { def apply[F[_]: ConcurrentEffect: ContextShift: Timer]: KafkaWirings[F] = { def paymentReceivedEventStream( consumerId: ConsumerId ): fs2.Stream[F, Committable[F, PaymentReceived]] = for { executionContext <- consumerExecutionContextStream[F] settings = bookingPaymentProcessSourceSettings(executionContext).withGroupId( consumerId.value ) consumer <- consumerStream[F].using(settings) _ <- consumer.subscribe(paymentReceivedTopic) stream <- consumer.stream.map( m => Committable(m.committableOffset.commit, m.record.value()) ) } yield stream new KafkaWirings[F](paymentReceivedEventStream) } def bookingPaymentProcessSourceSettings( ec: ExecutionContext ): ConsumerSettings[String, PaymentReceived] = ConsumerSettings( keyDeserializer = new StringDeserializer, valueDeserializer = new PaymentReceivedEventDeserializer, executionContext = ec ).withAutoOffsetReset(AutoOffsetReset.Earliest).withBootstrapServers("0.0.0.0:9092") val paymentReceivedTopic: NonEmptyList[String] = NonEmptyList.one("PaymentReceived") }
Example 40
Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.integration.clients import java.util.Properties import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer} import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer} import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster object KafkaTestClient { val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1) KAFKA_CLUSTER.start() } class KafkaTestClient { import KafkaTestClient._ val INPUT_TOPIC = "spans" val OUTPUT_TOPIC = "span-buffer" val APP_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer]) props } val APP_CONSUMER_CONFIG: Properties = new Properties() val TEST_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer]) props } val RESULT_CONSUMER_CONFIG = new Properties() def buildConfig = KafkaConfiguration(numStreamThreads = 1, pollTimeoutMs = 100, APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC, consumerCloseTimeoutInMillis = 3000, commitOffsetRetries = 3, commitBackoffInMillis = 250, maxWakeups = 5, wakeupTimeoutInMillis = 3000) def prepare(appId: String): Unit = { APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer") APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer]) deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC) KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1) KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC) } private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*) }