org.apache.kafka.common.TopicPartition Scala Examples

The following examples show how to use org.apache.kafka.common.TopicPartition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaTopicInfo.scala    From matcher   with MIT License 7 votes vote down vote up
package tools

import java.io.File

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer
import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta}
import com.wavesplatform.dex.settings.toConfigOps
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration.DurationInt

object KafkaTopicInfo extends App {
  implicit val system: ActorSystem = ActorSystem()

  val configFile = new File(args(0))
  val topic      = args(1)
  val from       = args(2).toLong
  val max        = args(3).toInt

  println(s"""configFile: ${configFile.getAbsolutePath}
             |topic: $topic
             |from: $from
             |max: $max""".stripMargin)

  val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos)

  val config = ConfigFactory
    .parseString("""waves.dex.events-queue.kafka.consumer.client {
                   |  client.id = "kafka-topics-info"
                   |  enable.auto.commit = false
                   |  auto.offset.reset = earliest
                   |}
                   |
                   |""".stripMargin)
    .withFallback {
      ConfigFactory
        .parseFile(configFile)
        .withFallback(ConfigFactory.defaultApplication())
        .withFallback(ConfigFactory.defaultReference())
        .resolve()
        .getConfig("waves.dex.events-queue.kafka")
    }

  val consumer = new KafkaConsumer[String, QueueEvent](
    config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties,
    new StringDeserializer,
    eventDeserializer
  )

  try {
    val topicPartition  = new TopicPartition(topic, 0)
    val topicPartitions = java.util.Collections.singletonList(topicPartition)
    consumer.assign(topicPartitions)

    {
      val r = consumer.partitionsFor(topic, requestTimeout)
      println(s"Partitions:\n${r.asScala.mkString("\n")}")
    }

    {
      val r = consumer.endOffsets(topicPartitions, requestTimeout)
      println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}")
    }

    consumer.seek(topicPartition, from)

    val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos)
    val lastOffset    = from + max
    var continue      = true
    while (continue) {
      println(s"Reading from Kafka")

      val xs = consumer.poll(pollDuriation).asScala.toVector
      xs.foreach { msg =>
        println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value()))
      }

      xs.lastOption.foreach { x =>
        if (x.offset() == lastOffset) continue = false
      }
    }
  } finally {
    consumer.close()
  }
} 
Example 2
Source File: PulsarSinkTask.scala    From stream-reactor   with Apache License 2.0 7 votes vote down vote up
package com.datamountaineer.streamreactor.connect.pulsar.sink

import java.util
import java.util.UUID

import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSinkConfig, PulsarSinkSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._



  override def stop(): Unit = {
    logger.info("Stopping Pulsar sink.")
    writer.foreach(w => w.close)
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    writer.foreach(w => w.flush)
  }

  override def version: String = manifest.version()
} 
Example 3
Source File: JMSSinkTask.scala    From stream-reactor   with Apache License 2.0 6 votes vote down vote up
package com.datamountaineer.streamreactor.connect.jms.sink

import java.util

import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.jms.config.{JMSConfig, JMSConfigConstants, JMSSettings}
import com.datamountaineer.streamreactor.connect.jms.sink.writer.JMSWriter
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._


  override def stop(): Unit = {
    logger.info("Stopping JMS sink.")
    writer.foreach(w => w.close())
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    //TODO
    //have the writer expose a is busy; can expose an await using a countdownlatch internally
  }

  override def version: String = manifest.version()
} 
Example 4
Source File: KuduSinkTask.scala    From stream-reactor   with Apache License 2.0 6 votes vote down vote up
package com.datamountaineer.streamreactor.connect.kudu.sink

import java.util

import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.kudu.config.{KuduConfig, KuduConfigConstants, KuduSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._


  override def stop(): Unit = {
    logger.info("Stopping Kudu sink.")
    writer.foreach(w => w.close())
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    writer.foreach(w => w.flush())
  }

  override def version: String = manifest.version()
} 
Example 5
Source File: ElasticSinkTask.scala    From stream-reactor   with Apache License 2.0 6 votes vote down vote up
package com.datamountaineer.streamreactor.connect.elastic6

import java.util

import com.datamountaineer.streamreactor.connect.elastic6.config.{ElasticConfig, ElasticConfigConstants, ElasticSettings}
import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._

class ElasticSinkTask extends SinkTask with StrictLogging {
  private var writer: Option[ElasticJsonWriter] = None
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  
  override def stop(): Unit = {
    logger.info("Stopping Elastic sink.")
    writer.foreach(w => w.close())
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    logger.info("Flushing Elastic Sink")
  }

  override def version: String = manifest.version()
} 
Example 6
Source File: PulsarSinkTaskTest.scala    From stream-reactor   with Apache License 2.0 6 votes vote down vote up
package com.datamountaineer.streamreactor.connect.pulsar.sink

import java.util

import com.datamountaineer.streamreactor.connect.pulsar.config.PulsarConfigConstants
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.SinkTaskContext
import org.mockito.MockitoSugar
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

import scala.collection.JavaConverters._


class PulsarSinkTaskTest extends AnyWordSpec with Matchers with MockitoSugar {

  val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic"

  "should start a Sink" in {
    val props = Map(
      PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650",
      PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO $pulsarTopic SELECT * FROM kafka_topic BATCH = 10 WITHPARTITIONER = SinglePartition WITHCOMPRESSION = ZLIB WITHDELAY = 1000"
    ).asJava


    val assignment: util.Set[TopicPartition] = new util.HashSet[TopicPartition]
    val partition: TopicPartition = new TopicPartition("kafka_topic", 1)
    //Set topic assignments
    assignment.add(partition)
    val context = mock[SinkTaskContext]
    when(context.assignment()).thenReturn(assignment)
    when(context.configs()).thenReturn(props)
    val task = new PulsarSinkTask()
    task.initialize(context)
    task.start(props)
  }
} 
Example 7
Source File: TwitterSinkTask.scala    From kafka-tweet-producer   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}
import scala.collection.JavaConverters._
import scala.util.{Success, Failure}

class TwitterSinkTask extends SinkTask with Logging {
  var writer: Option[SimpleTwitterWriter] = None

  override def start(props: util.Map[String, String]): Unit = {
    val sinkConfig = new TwitterSinkConfig(props)
    writer = Some(new TwitterWriter(
      sinkConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG),
      sinkConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value,
      sinkConfig.getString(TwitterSinkConfig.TOKEN_CONFIG),
      sinkConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value))
  }

  override def put(records: util.Collection[SinkRecord]): Unit =
    records.asScala
      .map(_.value.toString)
      .map(text => (text, writer match {
        case Some(writer) => writer.updateStatus(text)
        case None => Failure(new IllegalStateException("twitter writer is not set"))
      }))
      .foreach {
        case (text, result) => result match {
          case Success(id) => log.info(s"successfully tweeted `${text}`; got assigned id ${id}")
          case Failure(err) => log.warn(s"tweeting `${text}` failed: ${err.getMessage}")
        }
      }

  override def stop(): Unit = {
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) = {
  }
  override def version(): String = ""
} 
Example 8
Source File: MetadataService.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafkastreams.queriablestate

import java.net.InetAddress
import java.util

import com.lightbend.java.configuration.kafka.ApplicationKafkaParameters
import com.lightbend.scala.kafkastreams.store.HostStoreInfo
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.streams.KafkaStreams
import org.apache.kafka.streams.state.{HostInfo, StreamsMetadata}

import scala.collection.JavaConverters._


  def streamsMetadataForStore(store: String, port: Int): util.List[HostStoreInfo] = { // Get metadata for all of the instances of this Kafka Streams application hosting the store
    val metadata = streams.allMetadataForStore(store).asScala.toSeq match{
      case list if !list.isEmpty => list
      case _ => Seq(new StreamsMetadata(
        new HostInfo("localhost", port),
        new util.HashSet[String](util.Arrays.asList(ApplicationKafkaParameters.STORE_NAME)), util.Collections.emptySet[TopicPartition]))
    }
    mapInstancesToHostStoreInfo(metadata)
  }


  private def mapInstancesToHostStoreInfo(metadatas: Seq[StreamsMetadata]) = metadatas.map(convertMetadata(_)).asJava

  private def convertMetadata(metadata: StreamsMetadata) : HostStoreInfo = {
    val currentHost = metadata.host match{
      case host if host equalsIgnoreCase("localhost") =>
        try{InetAddress.getLocalHost.getHostAddress}
        catch {case t: Throwable => ""}
      case host => host
    }
     new HostStoreInfo(currentHost, metadata.port, metadata.stateStoreNames.asScala.toSeq)
  }
} 
Example 9
Source File: PlainSourceConsumer.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com.example.consumer

import java.util.concurrent.atomic.AtomicLong

import akka.Done
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import akka.stream.scaladsl.Sink
import com.example._
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

object PlainSourceConsumer extends App {

  val db = new DB
  db.loadOffset().foreach { fromOffset =>
    val partition = 0
    val subscription = Subscriptions.assignmentWithOffset(
      new TopicPartition(topic, partition) -> fromOffset
    )

    val done =
      Consumer.plainSource(consumerSettings, subscription)
        .mapAsync(1)(db.save)
        .runWith(Sink.ignore)
  }

}

//Zookeeper or DB storage mock
class DB {

  private val offset = new AtomicLong(2)

  def save(record: ConsumerRecord[Array[Byte], String]): Future[Done] = {
    println(s"DB.save: ${record.value}")
    offset.set(record.offset)
    Future.successful(Done)
  }

  def loadOffset(): Future[Long] =
    Future.successful(offset.get)

  def update(data: String): Future[Done] = {
    println(s"DB.update: $data")
    Future.successful(Done)
  }
} 
Example 10
Source File: CoapSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.coap.sink

import java.util

import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSinkConfig}
import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.collection.mutable


class CoapSinkTask extends SinkTask with StrictLogging {
  private val writers = mutable.Map.empty[String, CoapWriter]
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {
    logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-sink-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val sinkConfig = CoapSinkConfig(conf)
    enableProgress = sinkConfig.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    val settings = CoapSettings(sinkConfig)

    //if error policy is retry set retry interval
    if (settings.head.errorPolicy.getOrElse(ErrorPolicyEnum.THROW).equals(ErrorPolicyEnum.RETRY)) {
      context.timeout(sinkConfig.getString(CoapConstants.ERROR_RETRY_INTERVAL).toLong)
    }
    settings.map(s => (s.kcql.getSource, CoapWriter(s))).map({ case (k, v) => writers.put(k, v) })
  }

  override def put(records: util.Collection[SinkRecord]): Unit = {
    records.asScala.map(r => writers(r.topic()).write(List(r)))
    val seq = records.asScala.toVector
    if (enableProgress) {
      progressCounter.update(seq)
    }
  }

  override def stop(): Unit = {
    writers.foreach({ case (t, w) =>
      logger.info(s"Shutting down writer for $t")
      w.stop()
    })
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def version: String = manifest.version()

} 
Example 11
Source File: HazelCastSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.hazelcast.sink

import java.util

import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.hazelcast.config.{HazelCastSinkConfig, HazelCastSinkConfigConstants, HazelCastSinkSettings}
import com.datamountaineer.streamreactor.connect.hazelcast.writers.HazelCastWriter
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._


  override def stop(): Unit = {
    logger.info("Stopping Hazelcast sink.")
    writer.foreach(w => w.close())
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    writer.foreach(w => w.flush())
  }

  override def version: String = manifest.version()
} 
Example 12
Source File: MqttSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.mqtt.sink

import java.util

import com.datamountaineer.streamreactor.connect.converters.sink.Converter
import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.mqtt.config.{MqttConfigConstants, MqttSinkConfig, MqttSinkSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}



  override def stop(): Unit = {
    logger.info("Stopping Mqtt sink.")
    writer.foreach(w => w.close)
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    writer.foreach(w => w.flush)
  }

  override def version: String = manifest.version()
} 
Example 13
Source File: CassandraSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.cassandra.sink

import java.util

import com.datamountaineer.streamreactor.connect.cassandra.config.{CassandraConfigSink, CassandraSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}



  override def stop(): Unit = {
    logger.info("Stopping Cassandra sink.")
    writer.foreach(w => w.close())
    if (enableProgress) {
      progressCounter.empty
    }
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def version: String = manifest.version()
} 
Example 14
Source File: MongoSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.mongodb.sink

import java.util

import com.datamountaineer.streamreactor.connect.mongodb.config.{MongoConfig, MongoConfigConstants}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}


  override def put(records: util.Collection[SinkRecord]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    val seq = records.asScala.toVector
    writer.foreach(w => w.write(seq))

    if (enableProgress) {
      progressCounter.update(seq)
    }
  }

  override def stop(): Unit = {
    logger.info("Stopping Mongo Database sink.")
    writer.foreach(w => w.close())
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def version: String = manifest.version()
} 
Example 15
Source File: DocumentDbSinkTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink

import java.util

import com.datamountaineer.streamreactor.connect.azure.documentdb.DocumentClientProvider
import com.datamountaineer.streamreactor.connect.azure.documentdb.config.{DocumentDbConfig, DocumentDbConfigConstants, DocumentDbSinkSettings}
import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.microsoft.azure.documentdb.DocumentClient
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}


  override def put(records: util.Collection[SinkRecord]): Unit = {
    require(writer.nonEmpty, "Writer is not set!")
    val seq = records.asScala.toVector
    writer.foreach(w => w.write(seq))

    if (enableProgress) {
      progressCounter.update(seq)
    }
  }

  override def stop(): Unit = {
    logger.info("Stopping Azure Document DB sink.")
    writer.foreach(w => w.close())
    progressCounter.empty()
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def version: String = manifest.version()
} 
Example 16
Source File: GenericSinkTask.scala    From kafka-connect-sap   with Apache License 2.0 5 votes vote down vote up
package com.sap.kafka.connect.sink

import java.util

import com.sap.kafka.connect.config.BaseConfig
import com.sap.kafka.utils.ConnectorException
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}
import org.slf4j.Logger


abstract class GenericSinkTask extends SinkTask with SinkWriter   {
    
    override def put(records: util.Collection[SinkRecord]): Unit = {
      log.info(s"PHASE - 1 - get records from kafka, Started for task with assigned " +
        s"partitions ${this.context.assignment().toString} ")
      log.info(s"Number of Records read for Sink: ${records.size}")
      retriesLeft = config.maxRetries
      if (records.isEmpty) {
        return
      }
      val recordsCount: Int = records.size
      log.trace("Received {} records for Sink", recordsCount)
      try {
        writer.write(records)
      } catch  {
        case exception : ConnectorException =>
          log.error("Write of {} records failed, remainingRetries={}", records.size(), retriesLeft)
          while (retriesLeft > 0) {
            try {
              retriesLeft = retriesLeft - 1
              writer.close()
              writer = initWriter(config)
              writer.write(records)
              retriesLeft = -1
            } catch {
              case exception: ConnectorException =>
                // ignore
            }
          }

          if (retriesLeft == 0)
            throw exception
      } finally {
        log.info(s"PHASE - 1 ended for task, with assigned partitions ${this.context.assignment().toString}")
      }
    }


    override def stop(): Unit = {
      log.info("Stopping task")
      writer.close()
    }

    override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) : Unit = {

    }

    override def version(): String = getClass.getPackage.getImplementationVersion


} 
Example 17
Source File: PrometheusMetricsReporterApiSpec.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.metrics.prometheus

import scala.collection.compat._
import cats.implicits._
import cats.effect.IO
import com.banno.kafka._
import com.banno.kafka.producer._
import com.banno.kafka.consumer._
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import io.prometheus.client.CollectorRegistry
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.jdk.CollectionConverters._
import scala.concurrent.ExecutionContext
import scala.concurrent.duration._

class PrometheusMetricsReporterApiSpec extends AnyFlatSpec with Matchers with InMemoryKafka {
  implicit val defaultContextShift = IO.contextShift(ExecutionContext.global)
  implicit val defaultConcurrent = IO.ioConcurrentEffect(defaultContextShift)
  implicit val defaultTimer = IO.timer(ExecutionContext.global)

  //when kafka clients change their metrics, this test will help identify the changes we need to make
  "Prometheus reporter" should "register Prometheus collectors for all known Kafka metrics" in {
    val topic = createTopic(2)
    val records =
      List(new ProducerRecord(topic, 0, "a", "a"), new ProducerRecord(topic, 1, "b", "b"))
    ProducerApi
      .resource[IO, String, String](
        BootstrapServers(bootstrapServer),
        MetricReporters[ProducerPrometheusReporter]
      )
      .use(
        p =>
          ConsumerApi
            .resource[IO, String, String](
              BootstrapServers(bootstrapServer),
              ClientId("c1"),
              MetricReporters[ConsumerPrometheusReporter]
            )
            .use(
              c1 =>
                ConsumerApi
                  .resource[IO, String, String](
                    BootstrapServers(bootstrapServer),
                    ClientId("c2"),
                    MetricReporters[ConsumerPrometheusReporter]
                  )
                  .use(
                    c2 =>
                      for {
                        _ <- p.sendSyncBatch(records)

                        _ <- c1.assign(topic, Map.empty[TopicPartition, Long])
                        _ <- c1.poll(1 second)
                        _ <- c1.poll(1 second)

                        _ <- c2.assign(topic, Map.empty[TopicPartition, Long])
                        _ <- c2.poll(1 second)
                        _ <- c2.poll(1 second)

                        _ <- IO.sleep(PrometheusMetricsReporterApi.defaultUpdatePeriod + (1 second))
                        _ <- p.close
                        _ <- c1.close
                        _ <- c2.close
                      } yield {
                        val registry = CollectorRegistry.defaultRegistry
                        registry.metricFamilySamples.asScala
                          .count(_.name.startsWith("kafka_producer")) should ===(56)
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_producer_record_send_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2)))

                        registry.metricFamilySamples.asScala
                          .count(_.name.startsWith("kafka_consumer")) should ===(50)
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_consumer_records_consumed_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2)))
                        registry.metricFamilySamples.asScala
                          .find(_.name == "kafka_consumer_topic_records_consumed_total")
                          .map(_.samples.asScala.map(_.value)) should ===(Some(List(2, 2)))
                      }
                  )
            )
      )
      .unsafeRunSync()
  }

} 
Example 18
Source File: JsonUtilsSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import org.apache.kafka.common.TopicPartition

import org.apache.spark.SparkFunSuite

class JsonUtilsSuite extends SparkFunSuite {

  test("parsing partitions") {
    val parsed = JsonUtils.partitions("""{"topicA":[0,1],"topicB":[4,6]}""")
    val expected = Array(
      new TopicPartition("topicA", 0),
      new TopicPartition("topicA", 1),
      new TopicPartition("topicB", 4),
      new TopicPartition("topicB", 6)
    )
    assert(parsed.toSeq === expected.toSeq)
  }

  test("parsing partitionOffsets") {
    val parsed = JsonUtils.partitionOffsets(
      """{"topicA":{"0":23,"1":-1},"topicB":{"0":-2}}""")

    assert(parsed(new TopicPartition("topicA", 0)) === 23)
    assert(parsed(new TopicPartition("topicA", 1)) === -1)
    assert(parsed(new TopicPartition("topicB", 0)) === -2)
  }
} 
Example 19
Source File: TwitterSinkTask.scala    From kafka-connect-twitter   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}
import scala.collection.JavaConverters._
import scala.util.{Success, Failure}

class TwitterSinkTask extends SinkTask with Logging {
  var writer: Option[SimpleTwitterWriter] = None

  override def start(props: util.Map[String, String]): Unit = {
    val sinkConfig = new TwitterSinkConfig(props)
    writer = Some(new TwitterWriter(
      sinkConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG),
      sinkConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value,
      sinkConfig.getString(TwitterSinkConfig.TOKEN_CONFIG),
      sinkConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value))
  }

  override def put(records: util.Collection[SinkRecord]): Unit =
    records.asScala
      .map(_.value.toString)
      .map(text => (text, writer match {
        case Some(writer) => writer.updateStatus(text)
        case None => Failure(new IllegalStateException("twitter writer is not set"))
      }))
      .foreach {
        case (text, result) => result match {
          case Success(id) => log.info(s"successfully tweeted `${text}`; got assigned id ${id}")
          case Failure(err) => log.warn(s"tweeting `${text}` failed: ${err.getMessage}")
        }
      }

  override def stop(): Unit = {
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) = {
  }
  override def version(): String = ""
} 
Example 20
Source File: ConsumerSelfManaged.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor.{Confirm, Subscribe}
import cakesolutions.kafka.akka.{ConsumerRecords, Extractor, KafkaConsumerActor, Offsets}
import com.typesafe.config.Config
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  def apply(config: Config): ActorRef = {
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "groupId",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(config)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds)

    val system = ActorSystem()
    system.actorOf(Props(new ConsumerSelfManaged(consumerConf, actorConf)))
  }
}

class ConsumerSelfManaged(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging {

  val recordsExt: Extractor[Any, ConsumerRecords[String, String]] = ConsumerRecords.extractor[String, String]

  val consumer: ActorRef = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )

  consumer ! Subscribe.ManualOffset(Offsets(Map((new TopicPartition("topic1", 0), 1))))

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records)
      sender() ! Confirm(records.offsets)
  }

  private def processRecords(records: ConsumerRecords[String, String]) = {
    records.pairs.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }
    log.info(s"Batch complete, offsets: ${records.offsets}")
  }
} 
Example 21
Source File: AutoPartitionConsumerWithManualOffset.scala    From scala-kafka-client   with MIT License 5 votes vote down vote up
package cakesolutions.kafka.examples

import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props}
import cakesolutions.kafka.KafkaConsumer
import cakesolutions.kafka.akka.KafkaConsumerActor._
import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor, Offsets}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.kafka.clients.consumer.OffsetResetStrategy
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.concurrent.duration._


  def apply(config: Config): ActorRef = {
    val consumerConf = KafkaConsumer.Conf(
      new StringDeserializer,
      new StringDeserializer,
      groupId = "test_group",
      enableAutoCommit = false,
      autoOffsetReset = OffsetResetStrategy.EARLIEST)
      .withConf(config)

    val actorConf = KafkaConsumerActor.Conf(1.seconds, 3.seconds)

    val system = ActorSystem()
    system.actorOf(Props(new AutoPartitionConsumerWithManualOffset(consumerConf, actorConf)))
  }
}

class AutoPartitionConsumerWithManualOffset(
  kafkaConfig: KafkaConsumer.Conf[String, String],
  actorConfig: KafkaConsumerActor.Conf) extends Actor with ActorLogging {

  private val recordsExt = ConsumerRecords.extractor[String, String]

  private val consumer = context.actorOf(
    KafkaConsumerActor.props(kafkaConfig, actorConfig, self)
  )

  consumer ! Subscribe.AutoPartitionWithManualOffset(List("topic1"), assignedListener, revokedListener)

  override def receive: Receive = {

    // Records from Kafka
    case recordsExt(records) =>
      processRecords(records.pairs)
      sender() ! Confirm(records.offsets)
  }

  private def processRecords(records: Seq[(Option[String], String)]) =
    records.foreach { case (key, value) =>
      log.info(s"Received [$key,$value]")
    }

  private def assignedListener(tps: List[TopicPartition]): Offsets = {
    log.info("Partitions have been assigned" + tps.toString())

    // Should load the offsets from a persistent store and any related state
    val offsetMap = tps.map{ tp =>
      tp -> 0l
    }.toMap

    // Return the required offsets for the assigned partitions
    Offsets(offsetMap)
  }

  private def revokedListener(tps: List[TopicPartition]): Unit = {
    log.info("Partitions have been revoked" + tps.toString())
    // Opportunity to clear any state for the revoked partitions
    ()
  }
} 
Example 22
Source File: Commit.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.clients.consumer.OffsetCommitCallback


trait Commit {
  def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit]
  def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit]
  final def commitBatchAsync(batch: Map[TopicPartition, Long]): Task[Unit] = commitBatchAsync(batch, null)
}

private[kafka] object Commit {

  val empty: Commit = new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  }
} 
Example 23
Source File: CommittableOffsetBatch.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition


  def mergeByCommitCallback(committableOffsets: Seq[CommittableOffset]): List[CommittableOffsetBatch] = {
    if (committableOffsets.nonEmpty) {
      committableOffsets
        .groupBy(_.commitCallback)
        .mapValues(CommittableOffsetBatch(_))
        .values
        .toList
    } else {
      List.empty
    }
  }
} 
Example 24
Source File: MergeByCommitCallbackTest.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
} 
Example 25
Source File: Commit.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.clients.consumer.OffsetCommitCallback


trait Commit {
  def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit]
  def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit]
  final def commitBatchAsync(batch: Map[TopicPartition, Long]): Task[Unit] = commitBatchAsync(batch, null)
}

private[kafka] object Commit {

  val empty: Commit = new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  }
} 
Example 26
Source File: CommittableOffsetBatch.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition


  def mergeByCommitCallback(committableOffsets: Seq[CommittableOffset]): List[CommittableOffsetBatch] = {
    if (committableOffsets.nonEmpty) {
      committableOffsets
        .groupBy(_.commitCallback)
        .mapValues(CommittableOffsetBatch(_))
        .values
        .toList
    } else {
      List.empty
    }
  }
} 
Example 27
Source File: MergeByCommitCallbackTest.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
} 
Example 28
Source File: Commit.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.clients.consumer.OffsetCommitCallback


trait Commit {
  def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit]
  def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit]
  final def commitBatchAsync(batch: Map[TopicPartition, Long]): Task[Unit] = commitBatchAsync(batch, null)
}

private[kafka] object Commit {

  val empty: Commit = new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  }
} 
Example 29
Source File: CommittableOffsetBatch.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition


  def mergeByCommitCallback(committableOffsets: Seq[CommittableOffset]): List[CommittableOffsetBatch] = {
    if (committableOffsets.nonEmpty) {
      committableOffsets
        .groupBy(_.commitCallback)
        .mapValues(CommittableOffsetBatch(_))
        .values
        .toList
    } else {
      List.empty
    }
  }
} 
Example 30
Source File: MergeByCommitCallbackTest.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import monix.kafka.config.AutoOffsetReset
import monix.reactive.Observable
import org.apache.kafka.clients.producer.ProducerRecord
import org.scalatest.{FunSuite, Matchers}

import scala.concurrent.duration._
import scala.concurrent.Await
import monix.execution.Scheduler.Implicits.global
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition
import org.scalacheck.Gen
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class MergeByCommitCallbackTest extends FunSuite with KafkaTestKit with ScalaCheckDrivenPropertyChecks with Matchers {

  val commitCallbacks: List[Commit] = List.fill(4)(new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  })

  val committableOffsetsGen: Gen[CommittableOffset] = for {
    partition <- Gen.posNum[Int]
    offset <- Gen.posNum[Long]
    commit <- Gen.oneOf(commitCallbacks)
  } yield CommittableOffset(new TopicPartition("topic", partition), offset, commit)

  test("merge by commit callback works") {
    forAll(Gen.nonEmptyListOf(committableOffsetsGen)) { offsets =>
      val partitions = offsets.map(_.topicPartition)
      val received: List[CommittableOffsetBatch] = CommittableOffsetBatch.mergeByCommitCallback(offsets)

      received.foreach { batch => partitions should contain allElementsOf batch.offsets.keys }

      received.size should be <= 4
    }
  }

  test("merge by commit callback for multiple consumers") {
    withRunningKafka {
      val count = 10000
      val topicName = "monix-kafka-merge-by-commit"

      val producerCfg = KafkaProducerConfig.default.copy(
        bootstrapServers = List("127.0.0.1:6001"),
        clientId = "monix-kafka-1-0-producer-test"
      )

      val producer = KafkaProducerSink[String, String](producerCfg, io)

      val pushT = Observable
        .range(0, count)
        .map(msg => new ProducerRecord(topicName, "obs", msg.toString))
        .bufferIntrospective(1024)
        .consumeWith(producer)

      val listT = Observable
        .range(0, 4)
        .mergeMap(i => createConsumer(i.toInt, topicName).take(500))
        .bufferTumbling(2000)
        .map(CommittableOffsetBatch.mergeByCommitCallback)
        .map { offsetBatches => assert(offsetBatches.length == 4) }
        .completedL

      Await.result(Task.parZip2(listT, pushT).runToFuture, 60.seconds)
    }
  }

  private def createConsumer(i: Int, topicName: String): Observable[CommittableOffset] = {
    val cfg = KafkaConsumerConfig.default.copy(
      bootstrapServers = List("127.0.0.1:6001"),
      groupId = s"kafka-tests-$i",
      autoOffsetReset = AutoOffsetReset.Earliest
    )

    KafkaConsumerObservable
      .manualCommit[String, String](cfg, List(topicName))
      .executeOn(io)
      .map(_.committableOffset)
  }
} 
Example 31
Source File: Commit.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.clients.consumer.OffsetCommitCallback


trait Commit {
  def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit]
  def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit]
  final def commitBatchAsync(batch: Map[TopicPartition, Long]): Task[Unit] = commitBatchAsync(batch, null)
}

private[kafka] object Commit {

  val empty: Commit = new Commit {
    override def commitBatchSync(batch: Map[TopicPartition, Long]): Task[Unit] = Task.unit

    override def commitBatchAsync(batch: Map[TopicPartition, Long], callback: OffsetCommitCallback): Task[Unit] =
      Task.unit
  }
} 
Example 32
Source File: CommittableOffsetBatch.scala    From monix-kafka   with Apache License 2.0 5 votes vote down vote up
package monix.kafka

import monix.eval.Task
import org.apache.kafka.clients.consumer.OffsetCommitCallback
import org.apache.kafka.common.TopicPartition


  def mergeByCommitCallback(committableOffsets: Seq[CommittableOffset]): List[CommittableOffsetBatch] = {
    if (committableOffsets.nonEmpty) {
      committableOffsets
        .groupBy(_.commitCallback)
        .mapValues(CommittableOffsetBatch(_))
        .values
        .toList
    } else {
      List.empty
    }
  }
} 
Example 33
Source File: TestPreferredReplicaLeaderElection.scala    From CMAK   with Apache License 2.0 5 votes vote down vote up
package kafka.manager.utils

import kafka.manager.utils.zero81.{PreferredLeaderElectionErrors, PreferredReplicaLeaderElectionCommand}
import org.apache.kafka.common.TopicPartition


class TestPreferredReplicaLeaderElection extends CuratorAwareTest {
  import PreferredLeaderElectionErrors._

  test("preferred replica leader election with empty set") {
    checkError[ElectionSetEmptyOnWrite] {
      withCurator { curator =>
        PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(curator,Set.empty)
      }
    }
  }

  test("preferred replica leader election") {
    withCurator { curator =>
      val set = Set(new TopicPartition("mytopic",1),new TopicPartition("mytopic",2),new TopicPartition("mytopic",3))
      PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(curator,set)
      val json: String = curator.getData.forPath(ZkUtils.PreferredReplicaLeaderElectionPath)
      assert(json == "{\"version\":1,\"partitions\":[{\"topic\":\"mytopic\",\"partition\":1},{\"topic\":\"mytopic\",\"partition\":2},{\"topic\":\"mytopic\",\"partition\":3}]}")
    }
  }

  test("preferred replica leader election already running") {
    checkError[ElectionAlreadyInProgress] {
      withCurator { curator =>
        val set = Set(new TopicPartition("mytopic", 1), new TopicPartition("mytopic", 2), new TopicPartition("mytopic", 3))
        PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(curator, set)
        val json: String = curator.getData.forPath(ZkUtils.PreferredReplicaLeaderElectionPath)
        assert(json == "{\"version\":1,\"partitions\":[{\"topic\":\"mytopic\",\"partition\":1},{\"topic\":\"mytopic\",\"partition\":2},{\"topic\":\"mytopic\",\"partition\":3}]}")
      }
    }
  }
} 
Example 34
Source File: CachedKafkaConsumer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging



  def getOrCreate(
      topic: String,
      partition: Int,
      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
    val topicPartition = new TopicPartition(topic, partition)
    val key = CacheKey(groupId, topicPartition)

    // If this is reattempt at running the task, then invalidate cache and start with
    // a new consumer
    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
      cache.remove(key)
      new CachedKafkaConsumer(topicPartition, kafkaParams)
    } else {
      if (!cache.containsKey(key)) {
        cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
      }
      cache.get(key)
    }
  }
} 
Example 35
Source File: JsonUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.io.Writer

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    partitionOffsets.foreach { case (tp, off) =>
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 36
Source File: JsonUtilsSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import org.apache.kafka.common.TopicPartition

import org.apache.spark.SparkFunSuite

class JsonUtilsSuite extends SparkFunSuite {

  test("parsing partitions") {
    val parsed = JsonUtils.partitions("""{"topicA":[0,1],"topicB":[4,6]}""")
    val expected = Array(
      new TopicPartition("topicA", 0),
      new TopicPartition("topicA", 1),
      new TopicPartition("topicB", 4),
      new TopicPartition("topicB", 6)
    )
    assert(parsed.toSeq === expected.toSeq)
  }

  test("parsing partitionOffsets") {
    val parsed = JsonUtils.partitionOffsets(
      """{"topicA":{"0":23,"1":-1},"topicB":{"0":-2}}""")

    assert(parsed(new TopicPartition("topicA", 0)) === 23)
    assert(parsed(new TopicPartition("topicA", 1)) === -1)
    assert(parsed(new TopicPartition("topicB", 0)) === -2)
  }
} 
Example 37
Source File: KafkaConsumerProxy.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.consumer

import akka.actor.Actor
import akka.pattern.pipe
import hydra.kafka.consumer.KafkaConsumerProxy._
import hydra.kafka.util.KafkaUtils
import org.apache.kafka.clients.consumer.Consumer
import org.apache.kafka.common.{PartitionInfo, TopicPartition}

import scala.collection.JavaConverters._
import scala.collection.immutable.Map
import scala.concurrent.Future

class KafkaConsumerProxy extends Actor {

  private var _defaultConsumer: Consumer[String, String] = _

  private implicit val ec = context.dispatcher

  override def preStart(): Unit = {
    _defaultConsumer = KafkaUtils.stringConsumerSettings.createKafkaConsumer()
  }

  override def receive: Receive = {
    case GetLatestOffsets(topic) =>
      val requestor = sender
      pipe(latestOffsets(topic).map(LatestOffsetsResponse(topic, _))) to requestor

    case GetPartitionInfo(topic) =>
      val requestor = sender
      pipe(partitionInfo(topic).map(PartitionInfoResponse(topic, _))) to requestor

    case ListTopics =>
      val requestor = sender
      pipe(listTopics().map(ListTopicsResponse(_))) to requestor
  }

  override def postStop(): Unit = {
    _defaultConsumer.close()
  }

  private def latestOffsets(
      topic: String
  ): Future[Map[TopicPartition, Long]] = {
    Future {
      val ts = _defaultConsumer
        .partitionsFor(topic)
        .asScala
        .map(pi => new TopicPartition(topic, pi.partition()))
      _defaultConsumer
        .endOffsets(ts.asJava)
        .asScala
        .map(tp => tp._1 -> tp._2.toLong)
        .toMap
    }
  }

  private def partitionInfo(topic: String): Future[Seq[PartitionInfo]] =
    Future(_defaultConsumer.partitionsFor(topic).asScala)

  private def listTopics(): Future[Map[String, Seq[PartitionInfo]]] = {
    Future(_defaultConsumer.listTopics().asScala.toMap)
      .map(res => res.mapValues(_.asScala.toSeq))
  }

}

object KafkaConsumerProxy {

  case class GetLatestOffsets(topic: String)

  case class LatestOffsetsResponse(
      topic: String,
      offsets: Map[TopicPartition, Long]
  )

  case class GetPartitionInfo(topic: String)

  case class PartitionInfoResponse(
      topic: String,
      partitionInfo: Seq[PartitionInfo]
  )

  case object ListTopics

  case class ListTopicsResponse(topics: Map[String, Seq[PartitionInfo]])

} 
Example 38
Source File: TopicsEndpoint.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.endpoints

import akka.actor.ActorSelection
import akka.http.scaladsl.common.EntityStreamingSupport
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import akka.pattern.ask
import akka.util.Timeout
import hydra.core.http.RouteSupport
import hydra.kafka.consumer.KafkaConsumerProxy.{GetLatestOffsets, LatestOffsetsResponse}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.collection.immutable.Map
import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, Future}


class TopicsEndpoint(consumerProxy:ActorSelection)(implicit ec:ExecutionContext) extends RouteSupport {

  import hydra.kafka.util.KafkaUtils._

  implicit val jsonStreamingSupport = EntityStreamingSupport.json()

  override val route =
    path("transports" / "kafka" / "consumer" / "topics" / Segment) {
      topicName =>
        get {
          extractRequestContext { ctx =>
            parameters('format.?, 'group.?, 'n ? 10, 'start ? "earliest") {
              (format, groupId, n, startOffset) =>
                val settings = loadConsumerSettings[Any, Any](
                  format.getOrElse("avro"),
                  groupId.getOrElse("hydra"),
                  startOffset
                )
                val offsets = latestOffsets(topicName)
                val source = Consumer
                  .plainSource(settings, Subscriptions.topics(topicName))
                  .initialTimeout(5.seconds)
                  .zipWithIndex
                  .takeWhile(rec =>
                    rec._2 <= n && !shouldCancel(offsets, rec._1)
                  )
                  .map(rec => rec._1.value().toString)
                  .watchTermination()((_, termination) =>
                    termination.failed.foreach {
                      case cause => ctx.fail(cause)
                    }
                  )
                complete(source)

            }
          }
        }
    }

  def shouldCancel(
      fpartitions: Future[Map[TopicPartition, Long]],
      record: ConsumerRecord[Any, Any]
  ): Boolean = {
    if (fpartitions.isCompleted) {
      val partitions = Await.result(fpartitions, 1.millis)
      val tp = new TopicPartition(record.topic(), record.partition())
      partitions.get(tp) match {
        case Some(offset) => record.offset() >= offset
        case None         => false
      }
    } else {
      false
    }

  }

  private def latestOffsets(
      topic: String
  ): Future[Map[TopicPartition, Long]] = {
    implicit val timeout = Timeout(5 seconds)
    (consumerProxy ? GetLatestOffsets(topic))
      .mapTo[LatestOffsetsResponse]
      .map(_.offsets)
  }

} 
Example 39
Source File: KafkaConsumerProxySpec.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.consumer

import akka.actor.{ActorSystem, Props}
import akka.testkit.{ImplicitSender, TestKit}
import hydra.kafka.consumer.KafkaConsumerProxy._
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.kafka.common.TopicPartition
import org.scalatest.matchers.should.Matchers
import org.scalatest.funspec.AnyFunSpecLike
import org.scalatest.BeforeAndAfterAll

import scala.concurrent.duration._


class KafkaConsumerProxySpec
    extends TestKit(ActorSystem("test"))
    with Matchers
    with AnyFunSpecLike
    with BeforeAndAfterAll
    with ImplicitSender {

  implicit val config =
    EmbeddedKafkaConfig(kafkaPort = 8092, zooKeeperPort = 3181)

  override def beforeAll() = {
    super.beforeAll()
    EmbeddedKafka.start()
    EmbeddedKafka.createCustomTopic("test-consumer1")
    EmbeddedKafka.createCustomTopic("test-consumer2")
  }

  override def afterAll() = {
    super.afterAll()
    EmbeddedKafka.stop()
    TestKit.shutdownActorSystem(system, verifySystemShutdown = true)
  }

  lazy val kafkaProxy = system.actorOf(Props[KafkaConsumerProxy])

  describe("When using KafkaConsumerProxy") {
    it("gets latest offsets for a topic") {
      kafkaProxy ! GetLatestOffsets("test-consumer1")
      expectMsg(
        10.seconds,
        LatestOffsetsResponse(
          "test-consumer1",
          Map(new TopicPartition("test-consumer1", 0) -> 0L)
        )
      )
    }

    it("lists topics") {
      kafkaProxy ! ListTopics
      expectMsgPF(10.seconds) {
        case ListTopicsResponse(topics) =>
          topics.keys should contain allOf ("test-consumer1", "test-consumer2")
      }
    }

    it("gets partition info") {
      kafkaProxy ! GetPartitionInfo("test-consumer2")
      expectMsgPF(10.seconds) {
        case PartitionInfoResponse(topic, response) =>
          topic shouldBe "test-consumer2"
          response.map(p => p.partition()) shouldBe Seq(0)
      }
    }

    it("handles errors") {
      kafkaProxy ! GetPartitionInfo("test-consumer-unknown")
      expectMsgPF(10.seconds) {
        case PartitionInfoResponse(topic, response) =>
          response(0).leader().idString shouldBe "0"
          topic should startWith("test-consumer-unknown")
      }
    }
  }
} 
Example 40
Source File: TestConnector.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.connector.test

import java.util.ArrayList
import java.util.concurrent.LinkedBlockingQueue

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.collection.JavaConverters._

import org.apache.kafka.clients.producer.RecordMetadata
import org.apache.kafka.common.TopicPartition
import common.StreamLogging

import org.apache.openwhisk.common.Counter
import org.apache.openwhisk.core.connector.Message
import org.apache.openwhisk.core.connector.MessageConsumer
import org.apache.openwhisk.core.connector.MessageProducer

class TestConnector(topic: String, override val maxPeek: Int, allowMoreThanMax: Boolean)
    extends MessageConsumer
    with StreamLogging {

  override def peek(duration: FiniteDuration, retry: Int = 0) = {
    val msgs = new ArrayList[Message]
    queue.synchronized {
      queue.drainTo(msgs, if (allowMoreThanMax) Int.MaxValue else maxPeek)
      msgs.asScala map { m =>
        offset += 1
        (topic, -1, offset, m.serialize.getBytes)
      }
    }
  }

  override def commit(retry: Int = 0) = {
    if (throwCommitException) {
      throw new Exception("commit failed")
    } else {
      // nothing to do
    }
  }

  def occupancy = queue.size

  def send(msg: Message): Future[RecordMetadata] = {
    producer.send(topic, msg)
  }

  def send(msgs: Seq[Message]): Future[RecordMetadata] = {
    import scala.language.reflectiveCalls
    producer.sendBulk(topic, msgs)
  }

  def close() = {
    closed = true
    producer.close()
  }

  private val producer = new MessageProducer {
    def send(topic: String, msg: Message, retry: Int = 0): Future[RecordMetadata] = {
      queue.synchronized {
        if (queue.offer(msg)) {
          logging.info(this, s"put: $msg")
          Future.successful(new RecordMetadata(new TopicPartition(topic, 0), 0, queue.size, -1, Long.box(-1L), -1, -1))
        } else {
          logging.error(this, s"put failed: $msg")
          Future.failed(new IllegalStateException("failed to write msg"))
        }
      }
    }

    def sendBulk(topic: String, msgs: Seq[Message]): Future[RecordMetadata] = {
      queue.synchronized {
        if (queue.addAll(msgs.asJava)) {
          logging.info(this, s"put: ${msgs.length} messages")
          Future.successful(new RecordMetadata(new TopicPartition(topic, 0), 0, queue.size, -1, Long.box(-1L), -1, -1))
        } else {
          logging.error(this, s"put failed: ${msgs.length} messages")
          Future.failed(new IllegalStateException("failed to write msg"))
        }
      }
    }

    def close() = {}
    def sentCount() = counter.next()
    val counter = new Counter()
  }

  var throwCommitException = false
  private val queue = new LinkedBlockingQueue[Message]()
  @volatile private var closed = false
  private var offset = -1L
} 
Example 41
Source File: SubscribedConsumer.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.common.TopicPartition
import zio.RIO
import zio.blocking.Blocking
import zio.clock.Clock
import zio.stream.ZStream
import zio.kafka.serde.Deserializer

class SubscribedConsumer(
  private val underlying: RIO[Blocking, Consumer.Service]
) {

  def partitionedStream[R, K, V](keyDeserializer: Deserializer[R, K], valueDeserializer: Deserializer[R, V]): ZStream[
    Clock with Blocking,
    Throwable,
    (TopicPartition, ZStream[R, Throwable, CommittableRecord[K, V]])
  ] =
    ZStream.fromEffect(underlying).flatMap(_.partitionedStream(keyDeserializer, valueDeserializer))

  def plainStream[R, K, V](
    keyDeserializer: Deserializer[R, K],
    valueDeserializer: Deserializer[R, V]
  ): ZStream[R with Clock with Blocking, Throwable, CommittableRecord[K, V]] =
    partitionedStream(keyDeserializer, valueDeserializer).flatMapPar(n = Int.MaxValue)(_._2)
}

class SubscribedConsumerFromEnvironment(
  private val underlying: RIO[Blocking with Consumer, Consumer.Service]
) {

  def partitionedStream[R, K, V](keyDeserializer: Deserializer[R, K], valueDeserializer: Deserializer[R, V]): ZStream[
    Clock with Blocking with Consumer,
    Throwable,
    (TopicPartition, ZStream[R, Throwable, CommittableRecord[K, V]])
  ] =
    ZStream.fromEffect(underlying).flatMap(_.partitionedStream(keyDeserializer, valueDeserializer))

  def plainStream[R, K, V](
    keyDeserializer: Deserializer[R, K],
    valueDeserializer: Deserializer[R, V]
  ): ZStream[R with Clock with Blocking with Consumer, Throwable, CommittableRecord[K, V]] =
    partitionedStream(keyDeserializer, valueDeserializer).flatMapPar(n = Int.MaxValue)(_._2)
} 
Example 42
Source File: RebalanceListener.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer.internal
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener
import org.apache.kafka.common.TopicPartition
import zio.{ Runtime, Task }
import scala.jdk.CollectionConverters._


  def ++(that: RebalanceListener) =
    RebalanceListener(
      assigned => onAssigned(assigned) *> that.onAssigned(assigned),
      revoked => onRevoked(revoked) *> that.onRevoked(revoked)
    )

  def toKafka(runtime: Runtime[Any]): ConsumerRebalanceListener =
    new ConsumerRebalanceListener {
      override def onPartitionsRevoked(partitions: java.util.Collection[TopicPartition]): Unit = {
        runtime.unsafeRun(onRevoked(partitions.asScala.toSet))
        ()
      }

      override def onPartitionsAssigned(partitions: java.util.Collection[TopicPartition]): Unit = {
        runtime.unsafeRun(onAssigned(partitions.asScala.toSet))
        ()
      }
    }

}

object RebalanceListener {
  def onRevoked(action: Set[TopicPartition] => Task[Unit]): RebalanceListener =
    RebalanceListener(_ => Task.unit, action)
} 
Example 43
Source File: DiagnosticEvent.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer.diagnostics

import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition

sealed trait DiagnosticEvent
object DiagnosticEvent {
  case class Poll(tpRequested: Set[TopicPartition], tpWithData: Set[TopicPartition], tpWithoutData: Set[TopicPartition])
      extends DiagnosticEvent
  case class Request(partition: TopicPartition) extends DiagnosticEvent

  sealed trait Commit extends DiagnosticEvent
  object Commit {
    case class Started(offsets: Map[TopicPartition, Long])                                extends Commit
    case class Success(offsets: Map[TopicPartition, OffsetAndMetadata])                   extends Commit
    case class Failure(offsets: Map[TopicPartition, OffsetAndMetadata], cause: Throwable) extends Commit
  }

  sealed trait Rebalance extends DiagnosticEvent
  object Rebalance {
    case class Revoked(partitions: Set[TopicPartition])  extends Rebalance
    case class Assigned(partitions: Set[TopicPartition]) extends Rebalance
  }
} 
Example 44
Source File: OffsetBatch.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.common.TopicPartition
import zio.{ Schedule, Task, ZIO }

sealed trait OffsetBatch {
  def offsets: Map[TopicPartition, Long]
  def commit: Task[Unit]
  def merge(offset: Offset): OffsetBatch
  def merge(offsets: OffsetBatch): OffsetBatch

  
  def commitOrRetry[R](policy: Schedule[R, Throwable, Any]): ZIO[R, Throwable, Unit] =
    Offset.commitOrRetry(commit, policy)
}

object OffsetBatch {
  val empty: OffsetBatch = EmptyOffsetBatch

  def apply(offsets: Iterable[Offset]): OffsetBatch = offsets.foldLeft(empty)(_ merge _)
}

private final case class OffsetBatchImpl(
  offsets: Map[TopicPartition, Long],
  commitHandle: Map[TopicPartition, Long] => Task[Unit]
) extends OffsetBatch {
  def commit: Task[Unit] = commitHandle(offsets)

  def merge(offset: Offset) =
    copy(
      offsets = offsets + (offset.topicPartition -> (offsets
        .getOrElse(offset.topicPartition, -1L) max offset.offset))
    )

  def merge(otherOffsets: OffsetBatch) = {
    val newOffsets = Map.newBuilder[TopicPartition, Long]
    newOffsets ++= offsets
    otherOffsets.offsets.foreach {
      case (tp, offset) =>
        val existing = offsets.getOrElse(tp, -1L)
        if (existing < offset)
          newOffsets += tp -> offset
    }

    copy(offsets = newOffsets.result())
  }
}

case object EmptyOffsetBatch extends OffsetBatch {
  val offsets: Map[TopicPartition, Long]       = Map()
  val commit                                   = Task.unit
  def merge(offset: Offset): OffsetBatch       = offset.batch
  def merge(offsets: OffsetBatch): OffsetBatch = offsets
} 
Example 45
Source File: Offset.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.clients.consumer.RetriableCommitFailedException
import org.apache.kafka.common.TopicPartition
import zio.{ Schedule, Task, ZIO }

sealed trait Offset {
  def topicPartition: TopicPartition
  def offset: Long
  def commit: Task[Unit]
  def batch: OffsetBatch

  
  def commitOrRetry[R](policy: Schedule[R, Throwable, Any]): ZIO[R, Throwable, Unit] =
    Offset.commitOrRetry(commit, policy)
}

object Offset {
  private[consumer] def commitOrRetry[R, B](
    commit: Task[Unit],
    policy: Schedule[R, Throwable, B]
  ): ZIO[R, Throwable, Unit] =
    commit.retry(
      Schedule.doWhile[Throwable] {
        case _: RetriableCommitFailedException => true
        case _                                 => false
      } && policy
    )
}

private final case class OffsetImpl(
  topicPartition: TopicPartition,
  offset: Long,
  commitHandle: Map[TopicPartition, Long] => Task[Unit]
) extends Offset {
  def commit: Task[Unit] = commitHandle(Map(topicPartition    -> offset))
  def batch: OffsetBatch = OffsetBatchImpl(Map(topicPartition -> offset), commitHandle)
} 
Example 46
Source File: CommittableRecord.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import zio.{ RIO, Task }
import zio.kafka.serde.Deserializer

final case class CommittableRecord[K, V](record: ConsumerRecord[K, V], offset: Offset) {
  def deserializeWith[R, K1, V1](
    keyDeserializer: Deserializer[R, K1],
    valueDeserializer: Deserializer[R, V1]
  )(implicit ev1: K <:< Array[Byte], ev2: V <:< Array[Byte]): RIO[R, CommittableRecord[K1, V1]] =
    for {
      key   <- keyDeserializer.deserialize(record.topic(), record.headers(), record.key())
      value <- valueDeserializer.deserialize(record.topic(), record.headers(), record.value())
    } yield {
      copy(
        record = new ConsumerRecord[K1, V1](
          record.topic(),
          record.partition(),
          record.offset(),
          record.timestamp(),
          record.timestampType(),
          ConsumerRecord.NULL_CHECKSUM, // Checksum is deprecated
          record.serializedKeySize(),
          record.serializedValueSize(),
          key,
          value,
          record.headers()
        )
      )
    }

  def key: K          = record.key
  def value: V        = record.value()
  def partition: Int  = record.partition()
  def timestamp: Long = record.timestamp()
}

object CommittableRecord {
  def apply[K, V](
    record: ConsumerRecord[K, V],
    commitHandle: Map[TopicPartition, Long] => Task[Unit]
  ): CommittableRecord[K, V] =
    CommittableRecord(
      record,
      OffsetImpl(new TopicPartition(record.topic(), record.partition()), record.offset(), commitHandle)
    )
} 
Example 47
Source File: ZkUtils.scala    From CMAK   with Apache License 2.0 5 votes vote down vote up
package kafka.manager.utils

import java.nio.charset.StandardCharsets

import org.apache.curator.framework.CuratorFramework
import org.apache.kafka.common.TopicPartition
import org.apache.zookeeper.CreateMode
import org.apache.zookeeper.KeeperException.{NoNodeException, NodeExistsException}
import org.apache.zookeeper.data.Stat


  def replicaAssignmentZkData(map: Map[String, Seq[Int]]): String = {
    toJson(Map("version" -> 1, "partitions" -> map))
  }

  def readData(curator: CuratorFramework, path: String): (String, Stat) = {
    val stat: Stat = new Stat()
    val dataStr: String = curator.getData.storingStatIn(stat).forPath(path)
    (dataStr, stat)
  }
  
  def readDataMaybeNull(curator: CuratorFramework, path: String): (Option[String], Stat) = {
    val stat: Stat = new Stat()
    try {
      val dataStr: String = curator.getData.storingStatIn(stat).forPath(path)
      (Option(dataStr), stat)
    } catch {
      case e: NoNodeException => {
        (None, stat)
      }
      case e2: Throwable => throw e2
    }
  }


  def getPartitionReassignmentZkData(partitionsToBeReassigned: Map[TopicPartition, Seq[Int]]): String = {
    toJson(Map("version" -> 1, "partitions" -> partitionsToBeReassigned.map(e => Map("topic" -> e._1.topic, "partition" -> e._1.partition,
      "replicas" -> e._2))))
  }
} 
Example 48
Source File: PreferredReplicaLeaderElectionCommand.scala    From CMAK   with Apache License 2.0 5 votes vote down vote up
package kafka.manager.utils.zero81

import grizzled.slf4j.Logging
import kafka.manager.utils._
import org.apache.curator.framework.CuratorFramework
import org.apache.kafka.common.TopicPartition
import org.apache.zookeeper.KeeperException.NodeExistsException
import org.json4s.JsonAST._


object PreferredReplicaLeaderElectionCommand extends Logging {

  def parsePreferredReplicaElectionData(jsonString: String): Set[TopicPartition] = {
    parseJson(jsonString).findField(_._1 == "partitions") match {
      case Some((_, arr)) =>
        val result: List[TopicPartition] = for {
          JArray(elements) <- arr
          JObject(children) <- elements
          JField("topic", JString(t)) <- children
          JField("partition", JInt(p)) <- children
        } yield new TopicPartition(t, p.toInt)
        checkCondition(result.nonEmpty, PreferredLeaderElectionErrors.ElectionSetEmptyOnRead(jsonString))
        result.toSet
      case None =>
        throwError(PreferredLeaderElectionErrors.ElectionSetEmptyOnRead(jsonString))
    }
  }


  def writePreferredReplicaElectionData(curator: CuratorFramework,
                                        partitionsUndergoingPreferredReplicaElection: Set[TopicPartition]) {
    checkCondition(partitionsUndergoingPreferredReplicaElection.nonEmpty,PreferredLeaderElectionErrors.ElectionSetEmptyOnWrite)
    val zkPath = ZkUtils.PreferredReplicaLeaderElectionPath
    val partitionsList : Set[Map[String,Any]] =
      partitionsUndergoingPreferredReplicaElection.map(e => Map[String,Any]("topic" -> e.topic, "partition" -> e.partition))
    val jsonData = toJson(Map("version" -> 1, "partitions" -> partitionsList))
    try {
      ZkUtils.createPersistentPath(curator, zkPath, jsonData)
      logger.info("Created preferred replica election path with %s".format(jsonData))
    } catch {
      case nee: NodeExistsException =>
        val partitionsUndergoingPreferredReplicaElection =
          PreferredReplicaLeaderElectionCommand.parsePreferredReplicaElectionData(ZkUtils.readData(curator, zkPath)._1)
        throwError(PreferredLeaderElectionErrors.ElectionAlreadyInProgress(partitionsUndergoingPreferredReplicaElection))
      case e2: Throwable =>
        throwError(PreferredLeaderElectionErrors.UnhandledException)
    }
  }
}

object PreferredLeaderElectionErrors {
  class ElectionSetEmptyOnWrite private[PreferredLeaderElectionErrors] extends UtilError("Preferred replica election data is empty")
  class ElectionSetEmptyOnRead private[PreferredLeaderElectionErrors] (json: String) extends UtilError(s"Preferred replica election data is empty on read : $json")
  class ElectionAlreadyInProgress private[PreferredLeaderElectionErrors] (partitionsUndergoingPreferredReplicaElection: Set[TopicPartition]) extends UtilError(
    "Preferred replica leader election currently in progress for " +
    "%s. Aborting operation".format(partitionsUndergoingPreferredReplicaElection))
  class UnhandledException private[PreferredLeaderElectionErrors] extends UtilError("Unhandled exception")

  def ElectionSetEmptyOnRead(json: String) = new ElectionSetEmptyOnRead(json)
  val ElectionSetEmptyOnWrite = new ElectionSetEmptyOnWrite
  def ElectionAlreadyInProgress(set: Set[TopicPartition]) = new ElectionAlreadyInProgress(set)
  val UnhandledException = new UnhandledException
} 
Example 49
Source File: ExampleApp.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package example1

import cats.effect._
import cats.implicits._
import com.banno.kafka._
import com.banno.kafka.admin._
import com.banno.kafka.schemaregistry._
import com.banno.kafka.consumer._
import com.banno.kafka.producer._
import com.sksamuel.avro4s.RecordFormat
import org.apache.kafka.clients.admin.NewTopic
import org.apache.kafka.clients.producer.ProducerRecord
import scala.concurrent.duration._
import org.apache.kafka.common.TopicPartition

final class ExampleApp[F[_]: Async: ContextShift] {
  import ExampleApp._

  // Change these for your environment as needed
  val topic = new NewTopic(s"example1.customers.v1", 1, 3.toShort)
  val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093"
  val schemaRegistryUri = "http://kafka.local:8081"

  val producerRecords: Vector[ProducerRecord[CustomerId, Customer]] = (1 to 10)
    .map(
      a =>
        new ProducerRecord(
          topic.name,
          CustomerId(a.toString),
          Customer(s"name-${a}", s"address-${a}")
        )
    )
    .toVector

  val producerResource: Resource[F, ProducerApi[F, CustomerId, Customer]] =
    ProducerApi.Avro4s.resource[F, CustomerId, Customer](
      BootstrapServers(kafkaBootstrapServers),
      SchemaRegistryUrl(schemaRegistryUri),
      ClientId("producer-example")
    )

  val consumerResource =
    ConsumerApi.Avro4s.resource[F, CustomerId, Customer](
      BootstrapServers(kafkaBootstrapServers),
      SchemaRegistryUrl(schemaRegistryUri),
      ClientId("consumer-example"),
      GroupId("consumer-example-group"),
      EnableAutoCommit(false)
    )

  val example: F[Unit] =
    for {
      _ <- Sync[F].delay(println("Starting kafka4s example"))

      _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic)
      _ <- Sync[F].delay(println(s"Created topic ${topic.name}"))

      schemaRegistry <- SchemaRegistryApi(schemaRegistryUri)
      _ <- schemaRegistry.registerKey[CustomerId](topic.name)
      _ <- Sync[F].delay(println(s"Registered key schema for topic ${topic.name}"))

      _ <- schemaRegistry.registerValue[Customer](topic.name)
      _ <- Sync[F].delay(println(s"Registered value schema for topic ${topic.name}"))

      _ <- producerResource.use(
        producer =>
          producerRecords.traverse_(
            pr =>
              producer.sendSync(pr) *> Sync[F]
                .delay(println(s"Wrote producer record: key ${pr.key} and value ${pr.value}"))
          )
      )

      _ <- consumerResource.use(
        consumer =>
          consumer.assign(topic.name, Map.empty[TopicPartition, Long]) *>
            consumer
              .recordStream(1.second)
              .take(producerRecords.size.toLong)
              .evalMap(
                cr =>
                  Sync[F]
                    .delay(println(s"Read consumer record: key ${cr.key} and value ${cr.value}"))
              )
              .compile
              .drain
      )

      _ <- Sync[F].delay(println("Finished kafka4s example"))
    } yield ()
}

object ExampleApp {
  case class CustomerId(id: String)
  case class Customer(name: String, address: String)
  implicit def customerIdRecordFormat = RecordFormat[CustomerId]
  implicit def customerRecordFormat = RecordFormat[Customer]

  def apply[F[_]: Async: ContextShift] = new ExampleApp[F]
} 
Example 50
Source File: ConsumerExtensionsSpec.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import net.manub.embeddedkafka.Codecs.stringValueCrDecoder
import net.manub.embeddedkafka.ConsumerExtensions._
import org.apache.kafka.clients.consumer.{
  ConsumerRecord,
  ConsumerRecords,
  KafkaConsumer
}
import org.apache.kafka.common.TopicPartition
import org.mockito.Mockito.{times, verify, when}
import org.scalatestplus.mockito.MockitoSugar

import scala.jdk.CollectionConverters._
import scala.concurrent.duration._

class ConsumerExtensionsSpec
    extends EmbeddedKafkaSpecSupport
    with MockitoSugar {

  "consumeLazily" should {
    "retry to get messages with the configured maximum number of attempts when poll fails" in {
      implicit val retryConf: ConsumerRetryConfig =
        ConsumerRetryConfig(2, 1.millis)

      val consumer = mock[KafkaConsumer[String, String]]
      val consumerRecords =
        new ConsumerRecords[String, String](
          Map
            .empty[TopicPartition, java.util.List[
              ConsumerRecord[String, String]
            ]]
            .asJava
        )

      when(consumer.poll(duration2JavaDuration(retryConf.poll)))
        .thenReturn(consumerRecords)

      consumer.consumeLazily[String]("topic")

      verify(consumer, times(retryConf.maximumAttempts))
        .poll(duration2JavaDuration(retryConf.poll))
    }

    "not retry to get messages with the configured maximum number of attempts when poll succeeds" in {
      implicit val retryConf: ConsumerRetryConfig =
        ConsumerRetryConfig(2, 1.millis)

      val consumer       = mock[KafkaConsumer[String, String]]
      val consumerRecord = mock[ConsumerRecord[String, String]]
      val consumerRecords = new ConsumerRecords[String, String](
        Map[TopicPartition, java.util.List[ConsumerRecord[String, String]]](
          new TopicPartition("topic", 1) -> List(consumerRecord).asJava
        ).asJava
      )

      when(consumer.poll(duration2JavaDuration(retryConf.poll)))
        .thenReturn(consumerRecords)

      consumer.consumeLazily[String]("topic")

      verify(consumer).poll(duration2JavaDuration(retryConf.poll))
    }

    "poll to get messages with the configured poll timeout" in {
      implicit val retryConf: ConsumerRetryConfig =
        ConsumerRetryConfig(1, 10.millis)

      val consumer = mock[KafkaConsumer[String, String]]
      val consumerRecords =
        new ConsumerRecords[String, String](
          Map
            .empty[TopicPartition, java.util.List[
              ConsumerRecord[String, String]
            ]]
            .asJava
        )

      when(consumer.poll(duration2JavaDuration(retryConf.poll)))
        .thenReturn(consumerRecords)

      consumer.consumeLazily[String]("topic")

      verify(consumer).poll(duration2JavaDuration(retryConf.poll))
    }
  }
} 
Example 51
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
} 
Example 52
Source File: JsonUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 53
Source File: JsonUtilsSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import org.apache.kafka.common.TopicPartition

import org.apache.spark.SparkFunSuite

class JsonUtilsSuite extends SparkFunSuite {

  test("parsing partitions") {
    val parsed = JsonUtils.partitions("""{"topicA":[0,1],"topicB":[4,6]}""")
    val expected = Array(
      new TopicPartition("topicA", 0),
      new TopicPartition("topicA", 1),
      new TopicPartition("topicB", 4),
      new TopicPartition("topicB", 6)
    )
    assert(parsed.toSeq === expected.toSeq)
  }

  test("parsing partitionOffsets") {
    val parsed = JsonUtils.partitionOffsets(
      """{"topicA":{"0":23,"1":-1},"topicB":{"0":-2}}""")

    assert(parsed(new TopicPartition("topicA", 0)) === 23)
    assert(parsed(new TopicPartition("topicA", 1)) === -1)
    assert(parsed(new TopicPartition("topicB", 0)) === -2)
  }
} 
Example 54
Source File: KafkaEventLogSpec.scala    From akka-stream-eventsourcing   with Apache License 2.0 5 votes vote down vote up
package com.github.krasserm.ases.log

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source}
import akka.testkit.TestKit
import com.github.krasserm.ases._
import org.apache.kafka.common.TopicPartition
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{Matchers, WordSpecLike}

import scala.collection.immutable.Seq

class KafkaEventLogSpec extends TestKit(ActorSystem("test")) with WordSpecLike with Matchers with ScalaFutures with StreamSpec with KafkaSpec {
  implicit val pc = PatienceConfig(timeout = Span(5, Seconds), interval = Span(10, Millis))

  val kafkaEventLog: KafkaEventLog = new KafkaEventLog(host, port)

  "A Kafka event log" must {
    "provide a sink for writing events and a source for delivering replayed events" in {
      val topicPartition = new TopicPartition("p-1", 0)
      val events = Seq("a", "b", "c").map(Emitted(_, emitterId))
      val expected = durables(events).map(Delivered(_)) :+ Recovered

      Source(events).runWith(kafkaEventLog.sink(topicPartition)).futureValue
      kafkaEventLog.source[String](topicPartition).take(4).runWith(Sink.seq).futureValue should be(expected)
    }
    "provide a flow with an input port for writing events and and output port for delivering replayed and live events" in {
      val topicPartition = new TopicPartition("p-2", 0)
      val events1 = Seq("a", "b", "c").map(Emitted(_, emitterId))
      val events2 = Seq("d", "e", "f").map(Emitted(_, emitterId))
      val expected = (durables(events1).map(Delivered(_)) :+ Recovered) ++ durables(events2, offset = 3).map(Delivered(_))

      Source(events1).runWith(kafkaEventLog.sink(topicPartition)).futureValue
      Source(events2).via(kafkaEventLog.flow(topicPartition)).take(7).runWith(Sink.seq).futureValue should be(expected)
    }
    "provide a source that only delivers events of compatible types" in {
      val topicPartition = new TopicPartition("p-3", 0)
      val events = Seq("a", "b", 1, 2).map(Emitted(_, emitterId))
      val expected = durables(events).drop(2).map(Delivered(_)) :+ Recovered

      Source(events).runWith(kafkaEventLog.sink(topicPartition)).futureValue
      kafkaEventLog.source[Int](topicPartition).take(3).runWith(Sink.seq).futureValue should be(expected)
    }
  }
} 
Example 55
Source File: EventCollaborationSpec.scala    From akka-stream-eventsourcing   with Apache License 2.0 5 votes vote down vote up
package com.github.krasserm.ases

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream.scaladsl.{Flow, Sink}
import akka.testkit.TestKit
import com.github.krasserm.ases.log.{KafkaEventLog, KafkaSpec}
import org.apache.kafka.common.TopicPartition
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{Matchers, WordSpecLike}

import scala.collection.immutable.Seq

class EventCollaborationSpec extends TestKit(ActorSystem("test")) with WordSpecLike with Matchers with ScalaFutures with StreamSpec with KafkaSpec {
  import EventSourcingSpec._

  implicit val pc = PatienceConfig(timeout = Span(5, Seconds), interval = Span(10, Millis))

  val emitterId1 = "processor1"
  val emitterId2 = "processor2"

  val kafkaEventLog: KafkaEventLog =
    new log.KafkaEventLog(host, port)

  def processor(emitterId: String, topicPartition: TopicPartition): Flow[Request, Response, NotUsed] =
    EventSourcing(emitterId, 0, requestHandler, eventHandler).join(kafkaEventLog.flow(topicPartition))

  "A group of EventSourcing stages" when {
    "joined with a shared event log" can {
      "collaborate via publish-subscribe" in {
        val topicPartition = new TopicPartition("p-1", 0)    // shared topic partition
        val (pub1, sub1) = probes(processor(emitterId1, topicPartition)) // processor 1
        val (pub2, sub2) = probes(processor(emitterId2, topicPartition)) // processor 2

        pub1.sendNext(Increment(3))
        // Both processors receive event but
        // only processor 1 creates response
        sub1.requestNext(Response(3))

        pub2.sendNext(Increment(-4))
        // Both processors receive event but
        // only processor 2 creates response
        sub2.requestNext(Response(-1))

        // consume and verify events emitted by both processors
        kafkaEventLog.source[Incremented](topicPartition).via(log.replayed).map {
          case Durable(event, eid, _, sequenceNr) => (event, eid, sequenceNr)
        }.runWith(Sink.seq).futureValue should be(Seq(
          (Incremented(3), emitterId1, 0L),
          (Incremented(-4), emitterId2, 1L)
        ))
      }
    }
  }
} 
Example 56
Source File: BasicConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training

import java.util
import java.util.{Collections, Properties}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition


object BasicConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer");
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)

    val listener = new RebalanceListener

    consumer.subscribe(Collections.singletonList(topic), listener)


    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset())
      }
    }
  }
}

class RebalanceListener extends ConsumerRebalanceListener {
  override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = {
    print("Assigned Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }

  override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = {
    print("Revoked Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }
} 
Example 57
Source File: IotHubSinkTask.scala    From toketi-kafka-connect-iothub   with MIT License 5 votes vote down vote up
package com.microsoft.azure.iot.kafka.connect.sink

import java.util

import com.microsoft.azure.iot.kafka.connect.source.JsonSerialization
import com.microsoft.azure.sdk.iot.service.{DeliveryAcknowledgement, Message}
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._

class IotHubSinkTask extends SinkTask with LazyLogging with JsonSerialization {

  // Protected for testing purposes
  protected     var messageSender     : Option[MessageSender]                = None
  protected     var acknowledgement   : DeliveryAcknowledgement              = DeliveryAcknowledgement.None
  private[this] var isClosing         : Boolean                              = false

  override def stop(): Unit = {
    logger.info("Stopping IotHubSink Task")
    if (this.messageSender.isDefined && !this.isClosing) {
      this.messageSender.synchronized {
        if (!this.isClosing) {
          this.isClosing = true
          logger.info("Closing IotHub clients")
          this.messageSender.get.close()
        }
      }
    }
  }

  override def put(records: util.Collection[SinkRecord]): Unit = {
    if (this.messageSender.isDefined && !this.isClosing) {
      this.messageSender.synchronized {
        if (!this.isClosing) {
          logger.info(s"Received ${records.size()} messages to be sent to devices. ")
          for (record: SinkRecord ← records.asScala) {
            val c2DMessage = C2DMessageConverter.validateSchemaAndGetMessage(record)
            this.sendMessage(c2DMessage)
          }
          logger.info(s"Started tasks to send ${records.size()} messages to devices.")
        }
      }
    } else {
      logger.info(s"Unable to send messages to devices - MessageSender is undefined " +
        s"= ${messageSender.isEmpty.toString}, isClosing = ${this.isClosing.toString}")
    }
  }

  private def sendMessage(c2DMessage: C2DMessage): Unit = {
    logger.info(s"Sending c2d message ${c2DMessage.toString}")
    val message = new Message(c2DMessage.message)
    message.setMessageId(c2DMessage.messageId)
    message.setDeliveryAcknowledgement(acknowledgement)
    if (c2DMessage.expiryTime.isDefined) {
      message.setExpiryTimeUtc(c2DMessage.expiryTime.get)
    }
    this.messageSender.get.sendMessage(c2DMessage.deviceId, message)
  }

  override def flush(offsets: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def start(props: util.Map[String, String]): Unit = {
    logger.info("Starting IotHub Sink")
    val connectionString = props.get(IotHubSinkConfig.IotHubConnectionString)
    this.messageSender = Some(this.getMessageSender(connectionString))
    this.acknowledgement =
      DeliveryAcknowledgement.valueOf(props.get(IotHubSinkConfig.IotHubMessageDeliveryAcknowledgement))
  }

  protected def getMessageSender(connectionString: String): MessageSender = {
    new IotHubMessageSender(connectionString)
  }

  override def version(): String = getClass.getPackage.getImplementationVersion
} 
Example 58
Source File: JsonUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 59
Source File: JsonUtilsSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import org.apache.kafka.common.TopicPartition

import org.apache.spark.SparkFunSuite

class JsonUtilsSuite extends SparkFunSuite {

  test("parsing partitions") {
    val parsed = JsonUtils.partitions("""{"topicA":[0,1],"topicB":[4,6]}""")
    val expected = Array(
      new TopicPartition("topicA", 0),
      new TopicPartition("topicA", 1),
      new TopicPartition("topicB", 4),
      new TopicPartition("topicB", 6)
    )
    assert(parsed.toSeq === expected.toSeq)
  }

  test("parsing partitionOffsets") {
    val parsed = JsonUtils.partitionOffsets(
      """{"topicA":{"0":23,"1":-1},"topicB":{"0":-2}}""")

    assert(parsed(new TopicPartition("topicA", 0)) === 23)
    assert(parsed(new TopicPartition("topicA", 1)) === -1)
    assert(parsed(new TopicPartition("topicB", 0)) === -2)
  }
} 
Example 60
Source File: RebalanceEvents.scala    From aecor   with MIT License 5 votes vote down vote up
package aecor.kafkadistributedprocessing.internal

import java.util

import aecor.data.Committable
import cats.effect.concurrent.Deferred
import cats.effect.implicits._
import cats.effect.{ ConcurrentEffect, Effect }
import cats.implicits._
import fs2.Stream
import fs2.concurrent.Queue
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener
import org.apache.kafka.common
import org.apache.kafka.common.TopicPartition

import scala.collection.JavaConverters._

private[kafkadistributedprocessing] object RebalanceEvents {
  final class UsePartiallyApplied[F[_]] {
    def subscribe[A](
      f: ConsumerRebalanceListener => F[Unit]
    )(implicit F: ConcurrentEffect[F]): F[Stream[F, Committable[F, RebalanceEvent]]] =
      for {
        queue <- Queue.unbounded[F, Committable[F, RebalanceEvent]]
        listener = new Listener[F](
          event =>
            Deferred[F, Unit]
              .flatMap { completion =>
                queue.enqueue1(Committable(completion.complete(()), event)) >> completion.get
            }
        )
        _ <- f(listener)
      } yield queue.dequeue
  }

  def apply[F[_]]: UsePartiallyApplied[F] = new UsePartiallyApplied[F]

  sealed abstract class RebalanceEvent
  object RebalanceEvent {
    final case class PartitionsRevoked(partitions: Set[TopicPartition]) extends RebalanceEvent
    final case class PartitionsAssigned(partitions: Set[TopicPartition]) extends RebalanceEvent
  }

  private final class Listener[F[_]: Effect](processEvent: RebalanceEvent => F[Unit])
      extends ConsumerRebalanceListener {

    override def onPartitionsRevoked(partitions: util.Collection[common.TopicPartition]): Unit =
      processEvent(RebalanceEvent.PartitionsRevoked(partitions.asScala.toSet)).toIO
        .unsafeRunSync()

    override def onPartitionsAssigned(partitions: util.Collection[common.TopicPartition]): Unit =
      processEvent(RebalanceEvent.PartitionsAssigned(partitions.asScala.toSet)).toIO
        .unsafeRunSync()
  }
} 
Example 61
Source File: ConsumerStrategy.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import scala.collection.JavaConverters._

import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer}
import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
import org.apache.kafka.common.TopicPartition


case class SubscribePatternStrategy(topicPattern: String) extends ConsumerStrategy {
  override def createConsumer(
      kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
    consumer.subscribe(
      ju.regex.Pattern.compile(topicPattern),
      new NoOpConsumerRebalanceListener())
    consumer
  }

  override def toString: String = s"SubscribePattern[$topicPattern]"
} 
Example 62
Source File: JsonUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 63
Source File: CommitMarkerOffsetsActor.scala    From kmq   with Apache License 2.0 5 votes vote down vote up
package com.softwaremill.kmq.redelivery

import akka.actor.Actor
import com.softwaremill.kmq.KafkaClients
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration._

class CommitMarkerOffsetsActor(markerTopic: String, clients: KafkaClients) extends Actor with StrictLogging {

  private val consumer = clients.createConsumer(null, classOf[ByteArrayDeserializer], classOf[ByteArrayDeserializer])

  private var toCommit: Map[Partition, Offset] = Map()

  import context.dispatcher

  override def preStart(): Unit = {
    logger.info("Started commit marker offsets actor")
  }

  override def postStop(): Unit = {
    try consumer.close()
    catch {
      case e: Exception => logger.error("Cannot close commit offsets consumer", e)
    }

    logger.info("Stopped commit marker offsets actor")
  }

  override def receive: Receive = {
    case CommitOffset(p, o) =>
      // only updating if the current offset is smaller
      if (toCommit.get(p).fold(true)(_ < o))
        toCommit += p -> o

    case DoCommit =>
      try {
        commitOffsets()
        toCommit = Map()
      } finally context.system.scheduler.scheduleOnce(1.second, self, DoCommit)
  }

  private def commitOffsets(): Unit = if (toCommit.nonEmpty) {
    consumer.commitSync(toCommit.map { case (partition, offset) =>
      (new TopicPartition(markerTopic, partition), new OffsetAndMetadata(offset))
    }.asJava)

    logger.debug(s"Committed marker offsets: $toCommit")
  }
}