org.apache.kafka.clients.consumer.ConsumerRecord Scala Examples
The following examples show how to use org.apache.kafka.clients.consumer.ConsumerRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicConsumerExample.scala From kafka_training with Apache License 2.0 | 5 votes |
package com.malaska.kafka.training import java.util import java.util.{Collections, Properties} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer} import org.apache.kafka.common.TopicPartition object BasicConsumerExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) println("Setting up parameters") val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer"); props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000"); props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000"); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); println("Creating Consumer") val consumer = new KafkaConsumer[String,String](props) val listener = new RebalanceListener consumer.subscribe(Collections.singletonList(topic), listener) println("Starting Consumer") while (true) { val records = consumer.poll(1000) val it = records.iterator() while (it.hasNext) { val record = it.next() println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset()) } } } } class RebalanceListener extends ConsumerRebalanceListener { override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = { print("Assigned Partitions:") val it = collection.iterator() while (it.hasNext) { print(it.next().partition() + ",") } println } override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = { print("Revoked Partitions:") val it = collection.iterator() while (it.hasNext) { print(it.next().partition() + ",") } println } }
Example 2
Source File: KafkaStreamingDemo.scala From MaxCompute-Spark with Apache License 2.0 | 5 votes |
package com.aliyun.odps.spark.examples.streaming.kafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} import org.apache.spark.streaming.{Seconds, StreamingContext} object KafkaStreamingDemo { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("KafkaStreamingDemo") .getOrCreate() val ssc = new StreamingContext(spark.sparkContext, Seconds(5)) // 请使用OSS作为Checkpoint存储 ssc.checkpoint("oss://bucket/checkpointDir/") // kafka配置参数 val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "192.168.1.1:9200,192.168.1.2:9200,192.168.1.3:9200", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "testGroupId", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val topics = Set("event_topic") val recordDstream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams) ) val dstream = recordDstream.map(f => (f.key(), f.value())) val data: DStream[String] = dstream.map(_._2) val wordsDStream: DStream[String] = data.flatMap(_.split(" ")) val wordAndOneDstream: DStream[(String, Int)] = wordsDStream.map((_, 1)) val result: DStream[(String, Int)] = wordAndOneDstream.reduceByKey(_ + _) result.print() ssc.start() ssc.awaitTermination() } }
Example 3
Source File: Kafka2OdpsDemo.scala From MaxCompute-Spark with Apache License 2.0 | 5 votes |
package com.aliyun.odps.spark.examples.streaming.kafka import com.aliyun.odps.spark.examples.streaming.common.SparkSessionSingleton import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} object Kafka2OdpsDemo { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("test") val ssc = new StreamingContext(sparkConf, Seconds(10)) // 请使用OSS作为Checkpoint存储,修改为有效OSS路径。OSS访问文档请参考 https://github.com/aliyun/MaxCompute-Spark/wiki/08.-Oss-Access%E6%96%87%E6%A1%A3%E8%AF%B4%E6%98%8E ssc.checkpoint("oss://bucket/checkpointdir") // kafka配置参数 val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "localhost:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "testGroupId", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) // 创建kafka dstream val topics = Set("test") val recordDstream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams) ) val dstream = recordDstream.map(f => (f.key(), f.value())) // 解析kafka数据并写入odps val data: DStream[String] = dstream.map(_._2) val wordsDStream: DStream[String] = data.flatMap(_.split(" ")) wordsDStream.foreachRDD(rdd => { val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf) import spark.implicits._ rdd.toDF("id").write.mode("append").saveAsTable("test_table") }) ssc.start() ssc.awaitTermination() } }
Example 4
Source File: Codecs.scala From embedded-kafka-schema-registry with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry import org.apache.kafka.clients.consumer.ConsumerRecord object Codecs { implicit def stringKeyGenericValueCrDecoder[V] : ConsumerRecord[String, V] => (String, V) = cr => (cr.key, cr.value) implicit def genericValueCrDecoder[V]: ConsumerRecord[String, V] => V = _.value implicit def stringKeyGenericValueTopicCrDecoder[V] : ConsumerRecord[String, V] => (String, String, V) = cr => (cr.topic, cr.key, cr.value) }
Example 5
Source File: Codecs.scala From embedded-kafka-schema-registry with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry.avro import org.apache.avro.generic.GenericRecord import org.apache.avro.specific.SpecificRecord import org.apache.kafka.clients.consumer.ConsumerRecord @deprecated( "Avro-related classes will be removed soon", since = "5.5.0" ) object Codecs { implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => (String, V) = cr => (cr.key, cr.value) implicit def avroValueCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => V = _.value implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => (String, String, V) = cr => (cr.topic, cr.key, cr.value) implicit def stringKeyGenericValueCrDecoder : ConsumerRecord[String, GenericRecord] => (String, GenericRecord) = cr => (cr.key, cr.value) implicit def genericKeyGenericValueCrDecoder : ConsumerRecord[GenericRecord, GenericRecord] => ( GenericRecord, GenericRecord ) = cr => (cr.key, cr.value) }
Example 6
Source File: RecordProcessor.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafka.client import com.lightbend.scala.kafka.RecordProcessorTrait import org.apache.kafka.clients.consumer.ConsumerRecord class RecordProcessor extends RecordProcessorTrait[Array[Byte], Array[Byte]] { override def processRecord(record: ConsumerRecord[Array[Byte], Array[Byte]]): Unit = { RecordProcessor.count += 1 val key = record.key() val value = record.value() println(s"Retrieved message #${RecordProcessor.count}: " + mkString("key", key) + ", " + mkString("value", value)) } private def mkString(label: String, array: Array[Byte]) = { if (array == null) s"${label} = ${array}" else s"${label} = ${array}, size = ${array.size}, first 5 elements = ${array.take(5).mkString("[", ",", "]")}" } } object RecordProcessor { var count = 0L }
Example 7
Source File: PlainSourceConsumer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.consumer import java.util.concurrent.atomic.AtomicLong import akka.Done import akka.kafka.Subscriptions import akka.kafka.scaladsl.Consumer import akka.stream.scaladsl.Sink import com.example._ import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future object PlainSourceConsumer extends App { val db = new DB db.loadOffset().foreach { fromOffset => val partition = 0 val subscription = Subscriptions.assignmentWithOffset( new TopicPartition(topic, partition) -> fromOffset ) val done = Consumer.plainSource(consumerSettings, subscription) .mapAsync(1)(db.save) .runWith(Sink.ignore) } } //Zookeeper or DB storage mock class DB { private val offset = new AtomicLong(2) def save(record: ConsumerRecord[Array[Byte], String]): Future[Done] = { println(s"DB.save: ${record.value}") offset.set(record.offset) Future.successful(Done) } def loadOffset(): Future[Long] = Future.successful(offset.get) def update(data: String): Future[Done] = { println(s"DB.update: $data") Future.successful(Done) } }
Example 8
Source File: KafkaStreamingLatestExample.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.kafka010 import java.{util => ju} import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010._ import org.apache.spark.{SparkContext, TaskContext} import scala.collection.JavaConversions._ import com.example._ object KafkaStreamingLatestExample { def main(args: Array[String]): Unit = { kafkaStream010Checkpointing() } def kafkaStream010Itself() = launchWithItself(kafkaStreaming010, appName = "Kafka010_DirectStream") private def kafkaStreaming010(streamingContext: StreamingContext): Unit = { val topics = Array("sample_topic") val stream = KafkaUtils.createDirectStream[String, String]( streamingContext, PreferConsistent, //It will consistently distribute partitions across all executors. Subscribe[String, String](topics, kafkaParams) ) stream.map(record => (record.key, record.value)).print() stream.foreachRDD { rdd => val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges rdd.foreachPartition { _ => val o: OffsetRange = offsetRanges(TaskContext.get.partitionId) println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}") } } storingOffsetsItself(stream) } private def storingOffsetsItself(stream: InputDStream[ConsumerRecord[String, String]]) = { stream.foreachRDD { rdd => val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges) } } private def kafkaRdd010() = { val sparkContext = new SparkContext("local[*]", "kafkaRdd010") val offsetRanges = Array( // topic, partition, inclusive starting offset, exclusive ending offset OffsetRange("sample_topic", 0, 10, 20), OffsetRange("sample_topic", 1, 10, 20) ) val params = new ju.HashMap[String, Object](kafkaParams) val kafkaRDD = KafkaUtils.createRDD[String, String](sparkContext, params , offsetRanges, PreferConsistent) println(kafkaRDD.map(_.value()).first()) } }
Example 9
Source File: FlinkKafkaCodecSerde.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.flink import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.streaming.connectors.kafka._ import cloudflow.streamlets.{ CodecInlet, CodecOutlet } private[flink] class FlinkKafkaCodecSerializationSchema[T: TypeInformation](outlet: CodecOutlet[T], topic: String) extends KafkaSerializationSchema[T] { override def serialize(value: T, timestamp: java.lang.Long): ProducerRecord[Array[Byte], Array[Byte]] = new ProducerRecord(topic, outlet.codec.encode(value)) } private[flink] class FlinkKafkaCodecDeserializationSchema[T: TypeInformation](inlet: CodecInlet[T]) extends KafkaDeserializationSchema[T] { override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): T = inlet.codec.decode(record.value) override def isEndOfStream(value: T): Boolean = false override def getProducedType: TypeInformation[T] = implicitly[TypeInformation[T]] }
Example 10
Source File: ConsumerBuilder.scala From asura with MIT License | 5 votes |
package asura.kafka import akka.actor.ActorSystem import akka.kafka.scaladsl.Consumer import akka.kafka.scaladsl.Consumer.Control import akka.kafka.{ConsumerSettings, Subscriptions} import akka.stream.scaladsl.Source import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer} import scala.collection.JavaConverters._ object ConsumerBuilder { def buildAvroSource[V]( brokerUrl: String, schemaRegisterUrl: String, group: String, topics: Set[String], resetType: String = "latest", )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = { val kafkaAvroSerDeConfig = Map[String, Any]( AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl, KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString ) val consumerSettings: ConsumerSettings[String, V] = { val kafkaAvroDeserializer = new KafkaAvroDeserializer() kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false) val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]] ConsumerSettings(system, new StringDeserializer, deserializer) .withBootstrapServers(brokerUrl) .withGroupId(group) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType) } Consumer.plainSource(consumerSettings, Subscriptions.topics(topics)) } }
Example 11
Source File: package.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka import org.scalacheck.{Arbitrary, Cogen, Gen} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.consumer.ConsumerRecord package object test { implicit def arbitraryProducerRecord[K: Arbitrary, V: Arbitrary] : Arbitrary[ProducerRecord[K, V]] = Arbitrary { for { t <- Gen.identifier k <- Arbitrary.arbitrary[K] v <- Arbitrary.arbitrary[V] } yield new ProducerRecord(t, k, v) } implicit def arbitraryConsumerRecord[K: Arbitrary, V: Arbitrary] : Arbitrary[ConsumerRecord[K, V]] = Arbitrary { for { t <- Gen.identifier p <- Gen.posNum[Int] o <- Gen.posNum[Long] k <- Arbitrary.arbitrary[K] v <- Arbitrary.arbitrary[V] } yield new ConsumerRecord(t, p, o, k, v) } //these things are necessary for EqSpec implicit def producerRecordCogen[K, V]: Cogen[ProducerRecord[K, V]] = Cogen(pr => pr.key.toString.length.toLong + pr.value.toString.length.toLong) // ¯\_(ツ)_/¯ implicit def consumerRecordCogen[K, V]: Cogen[ConsumerRecord[K, V]] = Cogen(cr => cr.key.toString.length.toLong + cr.value.toString.length.toLong) // ¯\_(ツ)_/¯ }
Example 12
Source File: CurrentOffsetCounter.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka.metrics.prometheus import cats.effect.Sync import cats.implicits._ import org.apache.kafka.clients.consumer.ConsumerRecord import io.prometheus.client._ import scala.math.max object CurrentOffsetCounter { def apply[F[_]](cr: CollectorRegistry, prefix: String, clientId: String)( implicit F: Sync[F] ): F[ConsumerRecord[_, _] => F[Unit]] = F.delay { Counter .build() .name(prefix + "_current_offset") .help("Counter for last consumed (not necessarily committed) offset of topic partition.") .labelNames("client_id", "topic", "partition") .register(cr) } .map { counter => (record: ConsumerRecord[_, _]) => for { value <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).get) delta = max(0, record.offset.toDouble - value) _ <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).inc(delta)) } yield () } }
Example 13
Source File: DirectKafkaWordCount.scala From spark-secure-kafka-app with Apache License 2.0 | 5 votes |
package com.cloudera.spark.examples import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.spark.SparkConf import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.streaming.kafka010.{ConsumerStrategies, LocationStrategies, KafkaUtils} import org.apache.spark.streaming._ object DirectKafkaWordCount { def main(args: Array[String]) { if (args.length < 2) { System.err.println(s""" |Usage: DirectKafkaWordCount <brokers> <topics> | <brokers> is a list of one or more Kafka brokers | <topics> is a list of one or more kafka topics to consume from | <ssl> true if using SSL, false otherwise. | """.stripMargin) System.exit(1) } val Array(brokers, topics, ssl) = args // Create context with 2 second batch interval val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount") val ssc = new StreamingContext(sparkConf, Seconds(2)) val isUsingSsl = ssl.toBoolean // Create direct kafka stream with brokers and topics val topicsSet = topics.split(",").toSet val commonParams = Map[String, Object]( "bootstrap.servers" -> brokers, "security.protocol" -> (if (isUsingSsl) "SASL_SSL" else "SASL_PLAINTEXT"), "sasl.kerberos.service.name" -> "kafka", "auto.offset.reset" -> "earliest", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "group.id" -> "default", "enable.auto.commit" -> (false: java.lang.Boolean) ) val additionalSslParams = if (isUsingSsl) { Map( "ssl.truststore.location" -> "/etc/cdep-ssl-conf/CA_STANDARD/truststore.jks", "ssl.truststore.password" -> "cloudera" ) } else { Map.empty } val kafkaParams = commonParams ++ additionalSslParams val messages: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams) ) // Get the lines, split them into words, count the words and print val lines = messages.map(_.value()) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _) wordCounts.print() // Start the computation ssc.start() ssc.awaitTermination() } }
Example 14
Source File: KafkaUtility.scala From real-time-stream-processing-engine with Apache License 2.0 | 5 votes |
package com.knoldus.streaming.kafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} object KafkaUtility { //TODO It should read from config private val kafkaParams = Map( "bootstrap.servers" -> "localhost:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "auto.offset.reset" -> "earliest", "group.id" -> "tweet-consumer" ) private val preferredHosts = LocationStrategies.PreferConsistent def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( ssc, preferredHosts, ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams) ) }
Example 15
Source File: ProcessingKafkaApplication.scala From Akka-Cookbook with MIT License | 5 votes |
package com.packt.chapter8 import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions} import akka.stream.{ActorMaterializer, ClosedShape} import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer} import scala.concurrent.duration._ object ProcessingKafkaApplication extends App { implicit val actorSystem = ActorSystem("SimpleStream") implicit val actorMaterializer = ActorMaterializer() val bootstrapServers = "localhost:9092" val kafkaTopic = "akka_streams_topic" val partition = 0 val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition)) val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServers) .withGroupId("akka_streams_group") .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer) .withBootstrapServers(bootstrapServers) val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder => import GraphDSL.Implicits._ val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!") val kafkaSource = Consumer.plainSource(consumerSettings, subscription) val kafkaSink = Producer.plainSink(producerSettings) val printlnSink = Sink.foreach(println) val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem)) val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value()) tickSource ~> mapToProducerRecord ~> kafkaSink kafkaSource ~> mapFromConsumerRecord ~> printlnSink ClosedShape }) runnableGraph.run() }
Example 16
Source File: Codecs.scala From embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization._ object Codecs { implicit val stringSerializer: Serializer[String] = new StringSerializer() implicit val nullSerializer: Serializer[Array[Byte]] = new ByteArraySerializer() implicit val stringDeserializer: Deserializer[String] = new StringDeserializer() implicit val nullDeserializer: Deserializer[Array[Byte]] = new ByteArrayDeserializer() implicit val stringKeyValueCrDecoder : ConsumerRecord[String, String] => (String, String) = cr => (cr.key(), cr.value) implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String = _.value() implicit val stringKeyValueTopicCrDecoder : ConsumerRecord[String, String] => (String, String, String) = cr => (cr.topic(), cr.key(), cr.value()) implicit val keyNullValueCrDecoder : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) = cr => (cr.key(), cr.value) implicit val nullValueCrDecoder : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value() implicit val keyNullValueTopicCrDecoder : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) = cr => (cr.topic(), cr.key(), cr.value()) }
Example 17
Source File: ConsumerExtensions.scala From embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer} import org.apache.kafka.common.KafkaException import scala.jdk.CollectionConverters._ import scala.concurrent.duration._ import scala.util.Try private def getNextBatch[T](poll: FiniteDuration, topics: Seq[String])( implicit decoder: ConsumerRecord[K, V] => T ): Seq[T] = Try { consumer.subscribe(topics.asJava) topics.foreach(consumer.partitionsFor) val records = consumer.poll(duration2JavaDuration(poll)) // use toList to force eager evaluation. toSeq is lazy records.iterator().asScala.toList.map(decoder(_)) }.recover { case ex: KafkaException => throw new KafkaUnavailableException(ex) }.get } }
Example 18
Source File: CommittableRecord.scala From zio-kafka with Apache License 2.0 | 5 votes |
package zio.kafka.consumer import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import zio.{ RIO, Task } import zio.kafka.serde.Deserializer final case class CommittableRecord[K, V](record: ConsumerRecord[K, V], offset: Offset) { def deserializeWith[R, K1, V1]( keyDeserializer: Deserializer[R, K1], valueDeserializer: Deserializer[R, V1] )(implicit ev1: K <:< Array[Byte], ev2: V <:< Array[Byte]): RIO[R, CommittableRecord[K1, V1]] = for { key <- keyDeserializer.deserialize(record.topic(), record.headers(), record.key()) value <- valueDeserializer.deserialize(record.topic(), record.headers(), record.value()) } yield { copy( record = new ConsumerRecord[K1, V1]( record.topic(), record.partition(), record.offset(), record.timestamp(), record.timestampType(), ConsumerRecord.NULL_CHECKSUM, // Checksum is deprecated record.serializedKeySize(), record.serializedValueSize(), key, value, record.headers() ) ) } def key: K = record.key def value: V = record.value() def partition: Int = record.partition() def timestamp: Long = record.timestamp() } object CommittableRecord { def apply[K, V]( record: ConsumerRecord[K, V], commitHandle: Map[TopicPartition, Long] => Task[Unit] ): CommittableRecord[K, V] = CommittableRecord( record, OffsetImpl(new TopicPartition(record.topic(), record.partition()), record.offset(), commitHandle) ) }
Example 19
Source File: TopicsEndpoint.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.endpoints import akka.actor.ActorSelection import akka.http.scaladsl.common.EntityStreamingSupport import akka.kafka.Subscriptions import akka.kafka.scaladsl.Consumer import akka.pattern.ask import akka.util.Timeout import hydra.core.http.RouteSupport import hydra.kafka.consumer.KafkaConsumerProxy.{GetLatestOffsets, LatestOffsetsResponse} import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import scala.collection.immutable.Map import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future} class TopicsEndpoint(consumerProxy:ActorSelection)(implicit ec:ExecutionContext) extends RouteSupport { import hydra.kafka.util.KafkaUtils._ implicit val jsonStreamingSupport = EntityStreamingSupport.json() override val route = path("transports" / "kafka" / "consumer" / "topics" / Segment) { topicName => get { extractRequestContext { ctx => parameters('format.?, 'group.?, 'n ? 10, 'start ? "earliest") { (format, groupId, n, startOffset) => val settings = loadConsumerSettings[Any, Any]( format.getOrElse("avro"), groupId.getOrElse("hydra"), startOffset ) val offsets = latestOffsets(topicName) val source = Consumer .plainSource(settings, Subscriptions.topics(topicName)) .initialTimeout(5.seconds) .zipWithIndex .takeWhile(rec => rec._2 <= n && !shouldCancel(offsets, rec._1) ) .map(rec => rec._1.value().toString) .watchTermination()((_, termination) => termination.failed.foreach { case cause => ctx.fail(cause) } ) complete(source) } } } } def shouldCancel( fpartitions: Future[Map[TopicPartition, Long]], record: ConsumerRecord[Any, Any] ): Boolean = { if (fpartitions.isCompleted) { val partitions = Await.result(fpartitions, 1.millis) val tp = new TopicPartition(record.topic(), record.partition()) partitions.get(tp) match { case Some(offset) => record.offset() >= offset case None => false } } else { false } } private def latestOffsets( topic: String ): Future[Map[TopicPartition, Long]] = { implicit val timeout = Timeout(5 seconds) (consumerProxy ? GetLatestOffsets(topic)) .mapTo[LatestOffsetsResponse] .map(_.offsets) } }
Example 20
Source File: Codecs.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry.avro import org.apache.avro.specific.SpecificRecord import org.apache.kafka.clients.consumer.ConsumerRecord object Codecs { implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => (String, V) = cr => (cr.key(), cr.value) implicit def avroValueCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => V = _.value() implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord] : ConsumerRecord[String, V] => (String, String, V) = cr => (cr.topic(), cr.key(), cr.value()) }
Example 21
Source File: Codecs.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization._ object Codecs { implicit val stringSerializer: Serializer[String] = new StringSerializer() implicit val nullSerializer: Serializer[Array[Byte]] = new ByteArraySerializer() implicit val stringDeserializer: Deserializer[String] = new StringDeserializer() implicit val nullDeserializer: Deserializer[Array[Byte]] = new ByteArrayDeserializer() implicit val stringKeyValueCrDecoder : ConsumerRecord[String, String] => (String, String) = cr => (cr.key(), cr.value) implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String = _.value() implicit val stringKeyValueTopicCrDecoder : ConsumerRecord[String, String] => (String, String, String) = cr => (cr.topic(), cr.key(), cr.value()) implicit val keyNullValueCrDecoder : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) = cr => (cr.key(), cr.value) implicit val nullValueCrDecoder : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value() implicit val keyNullValueTopicCrDecoder : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) = cr => (cr.topic(), cr.key(), cr.value()) }
Example 22
Source File: ConsumerExtensions.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer} import org.apache.kafka.common.KafkaException import scala.util.Try private def getNextBatch[T](poll: Long, topics: Seq[String])( implicit decoder: ConsumerRecord[K, V] => T): Seq[T] = Try { import scala.collection.JavaConverters._ consumer.subscribe(topics.asJava) topics.foreach(consumer.partitionsFor) val records = consumer.poll(java.time.Duration.ofMillis(poll)) // use toList to force eager evaluation. toSeq is lazy records.iterator().asScala.toList.map(decoder(_)) }.recover { case ex: KafkaException => throw new KafkaUnavailableException(ex) }.get } }
Example 23
Source File: CachedKafkaConsumer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.{util => ju} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer} import org.apache.kafka.common.TopicPartition import org.apache.spark.{SparkEnv, SparkException, TaskContext} import org.apache.spark.internal.Logging def getOrCreate( topic: String, partition: Int, kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized { val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String] val topicPartition = new TopicPartition(topic, partition) val key = CacheKey(groupId, topicPartition) // If this is reattempt at running the task, then invalidate cache and start with // a new consumer if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) { cache.remove(key) new CachedKafkaConsumer(topicPartition, kafkaParams) } else { if (!cache.containsKey(key)) { cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams)) } cache.get(key) } } }