org.apache.kafka.connect.source.SourceRecord Scala Example

Source File: TwitterStatusReader.scala From kafka-tweet-producer with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util
import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors}
import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus
import com.twitter.hbc.httpclient.BasicClient
import com.twitter.hbc.twitter4j.Twitter4jStatusClient
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import twitter4j._
import scala.collection.JavaConverters._
import Extensions._

class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging {
  override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning")
  override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice")

  override def onScrubGeo(l: Long, l1: Long) = {
    log.debug(s"onScrubGeo $l $l1")
  }

  override def onStatus(status: Status) = {
    log.debug("onStatus")
    queue.put(status)
  }

  override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i")
  override def onException(e: Exception)= log.warn("onException " + e.toString)
}

trait StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord
}

object StatusToStringKeyValue extends StatusToSourceRecord {
  def convert (status: Status, topic: String): SourceRecord = {
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      Schema.STRING_SCHEMA,
      status.getText)
  }
}

object StatusToTwitterStatusStructure extends StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord = {
    //val ts = TwitterStatus.struct(TwitterStatus(status))
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      TwitterStatus.schema,
      TwitterStatus.struct(status))
  }
}


  def stop() = {
    log.info("Stop Twitter client")
    client.stop()
  }


}

Source File: HiveSource.scala From stream-reactor with Apache License 2.0

5 votes

package com.landoop.streamreactor.connect.hive.source

import com.landoop.streamreactor.connect.hive
import com.landoop.streamreactor.connect.hive._
import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record}
import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig
import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper}
import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.metastore.IMetaStoreClient
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._


class HiveSource(db: DatabaseName,
                 tableName: TableName,
                 topic: Topic,
                 offsetReader: HiveSourceOffsetStorageReader,
                 config: HiveSourceConfig)
                (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] {

  val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic)
    .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}"))

  private val table = client.getTable(db.value, tableName.value)
  private val format = HiveFormat(hive.serde(table))
  private val metastoreSchema = HiveSchemas.toKafka(table)
  private val parts = TableFileScanner.scan(db, tableName)

  private val readers = parts.map { case (path, partition) =>

    val fns: Seq[Struct => Struct] = Seq(
      partition.map(new PartitionValueMapper(_).map _),
      tableConfig.projection.map(new ProjectionMapper(_).map _)
    ).flatten
    val mapper: Struct => Struct = Function.chain(fns)

    val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0))

    new HiveReader {
      lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema)
      override def iterator: Iterator[Record] = reader.iterator.map { record =>
        Record(mapper(record.struct), record.path, record.offset)
      }
      override def close(): Unit = reader.close()
    }
  }

  private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit)

  override def hasNext: Boolean = iterator.hasNext

  override def next(): SourceRecord = {

    val record = iterator.next
    val sourcePartition = SourcePartition(db, tableName, topic, record.path)
    val offset = SourceOffset(record.offset)

    new SourceRecord(
      fromSourcePartition(sourcePartition).asJava,
      fromSourceOffset(offset).asJava,
      topic.value,
      record.struct.schema,
      record.struct
    )
  }

  def close(): Unit = {
    readers.foreach(_.close())
  }
}

Source File: CoapReaderFactory.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.coap.source

import java.util
import java.util.concurrent.LinkedBlockingQueue

import com.datamountaineer.streamreactor.connect.coap.configs.CoapSetting
import com.datamountaineer.streamreactor.connect.coap.connection.CoapManager
import com.datamountaineer.streamreactor.connect.coap.domain.CoapMessageConverter
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.SourceRecord
import org.eclipse.californium.core.{CoapHandler, CoapObserveRelation, CoapResponse, WebLink}


class MessageHandler(resource: String, topic: String, queue: LinkedBlockingQueue[SourceRecord]) extends CoapHandler with StrictLogging {
  val converter = CoapMessageConverter()

  override def onError(): Unit = {
    logger.warn(s"Message dropped for $topic!")
  }

  override def onLoad(response: CoapResponse): Unit = {
    val records = converter.convert(resource, topic, response.advanced())
    logger.debug(s"Received ${response.advanced().toString} for $topic")
    logger.debug(s"Records in queue ${queue.size()} for $topic")
    queue.put(records)
  }
}

Source File: CoapSourceTask.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.coap.source

import java.util
import java.util.concurrent.LinkedBlockingQueue

import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSourceConfig}
import com.datamountaineer.streamreactor.connect.queues.QueueHelpers
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}

import scala.collection.JavaConverters._


class CoapSourceTask extends SourceTask with StrictLogging {
  private var readers: Set[CoapReader] = _
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private val queue = new LinkedBlockingQueue[SourceRecord]()
  private var batchSize: Int = CoapConstants.BATCH_SIZE_DEFAULT
  private var lingerTimeout = CoapConstants.SOURCE_LINGER_MS_DEFAULT
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {
    logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val config = CoapSourceConfig(conf)
    enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    val settings = CoapSettings(config)
    batchSize = config.getInt(CoapConstants.BATCH_SIZE)
    lingerTimeout = config.getInt(CoapConstants.SOURCE_LINGER_MS)
    enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    readers = CoapReaderFactory(settings, queue)
  }

  override def poll(): util.List[SourceRecord] = {
    val records = new util.ArrayList[SourceRecord]()

    QueueHelpers.drainWithTimeoutNoGauva(records, batchSize, lingerTimeout * 1000000 , queue)

    if (enableProgress) {
      progressCounter.update(records.asScala.toVector)
    }
    records
  }

  override def stop(): Unit = {
    logger.info("Stopping Coap source and closing connections.")
    readers.foreach(_.stop())
    progressCounter.empty
  }

  override def version: String = manifest.version()
}

Source File: SimpleFileConverter.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.collection.JavaConverters._


class SimpleFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader)
  extends FileConverter(props, offsetStorageReader) {

  val cfg = new FtpSourceConfig(props)
  val metaStore = new ConnectFileMetaDataStore(offsetStorageReader)
  val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter
  val recordMaker: SourceRecordProducer = cfg.keyStyle match {
    case KeyStyle.String => SourceRecordProducers.stringKeyRecord
    case KeyStyle.Struct => SourceRecordProducers.structKeyRecord
  }

  override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = {
    metaStore.set(meta.attribs.path, meta)
    recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala
  }

  override def getFileOffset(path: String): Option[FileMetaData] = metaStore.get(path)
}

Source File: FileConverter.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.util.{Failure, Success, Try}


abstract class FileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader) {
  def convert(topic: String, meta: FileMetaData, body: FileBody) : Seq[SourceRecord]
  def getFileOffset(path: String) : Option[FileMetaData]
}

object FileConverter {
  def apply(klass: Class[_], props: util.Map[String, String], offsetStorageReader: OffsetStorageReader) : FileConverter = {
    Try(klass.getDeclaredConstructor(classOf[util.Map[String, String]], classOf[OffsetStorageReader])
      .newInstance(props, offsetStorageReader).asInstanceOf[FileConverter]) match {
      case Success(fc) => fc
      case Failure(err) => throw new Exception(s"Failed to create ${klass} as instance of ${classOf[FileConverter]}", err)
    }
  }
}

Source File: MaxLinesFileConverter.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.collection.JavaConverters._


class MaxLinesFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader)
  extends FileConverter(props, offsetStorageReader) {

  val cfg = new FtpSourceConfig(props)
  val metaStore = new ConnectFileMetaDataStore(offsetStorageReader)
  val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter
  val recordMaker: SourceRecordProducer = cfg.keyStyle match {
    case KeyStyle.String => SourceRecordProducers.stringKeyRecord
    case KeyStyle.Struct => SourceRecordProducers.structKeyRecord
  }
  val lineSep = System.getProperty("line.separator").getBytes

  override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = {
    if (meta.attribs.size == meta.offset) {
      //Last slice of the file. there is maybe no line separator at the end of the file
      metaStore.set(meta.attribs.path, meta)
      recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala
    } else {
      val offsetInSlice = findEndPositionOfLastMatch(lineSep, body.bytes)
      // TODO : warn that no line seprator was found, suggest that the line sizes maybe exceeds the slice size
      val offset = meta.offset - (body.bytes.size-offsetInSlice)
      metaStore.set(meta.attribs.path, meta.offset(offset))
      val trimmedBody = FileBody(util.Arrays.copyOfRange(body.bytes, 0, offsetInSlice), 0)
      recordConverter.convert(recordMaker(metaStore, topic, meta, trimmedBody)).asScala
    }
  }

  def findEndPositionOfLastMatch(bytesToMatch: Array[Byte], content: Array[Byte]) : Int = {
    for (pos <- content.size to bytesToMatch.size by -1){
      val window = util.Arrays.copyOfRange(content, pos - bytesToMatch.size, pos)
      if (window.deep == bytesToMatch.deep) return pos
    }
    -1
  }

  override def getFileOffset(path: String): Option[FileMetaData] = {
    metaStore.get(path)
  }
}

Source File: SourceRecordProducers.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct}
import org.apache.kafka.connect.source.SourceRecord


object SourceRecordProducers {
  type SourceRecordProducer = (ConnectFileMetaDataStore, String, FileMetaData, FileBody) => SourceRecord

  val fileInfoSchema = SchemaBuilder.struct()
    .field("name", Schema.STRING_SCHEMA)
    .field("offset", Schema.INT64_SCHEMA)
    .build()

  def stringKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord =
    new SourceRecord(
      store.fileMetasToConnectPartition(meta), // source part
      store.fileMetasToConnectOffset(meta), // source off
      topic, //topic
      Schema.STRING_SCHEMA, // key sch
      meta.attribs.path, // key
      Schema.BYTES_SCHEMA, // val sch
      body.bytes // val
    )

  def structKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = {
    new SourceRecord(
      store.fileMetasToConnectPartition(meta), // source part
      store.fileMetasToConnectOffset(meta), // source off
      topic, //topic
      fileInfoSchema, // key sch
      new Struct(fileInfoSchema)
        .put("name",meta.attribs.path)
        .put("offset",body.offset),
      Schema.BYTES_SCHEMA, // val sch
      body.bytes // val
    )
  }
}

Source File: TwitterReader.scala From kafka-tweet-producer with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util.concurrent.LinkedBlockingQueue

import com.twitter.hbc.ClientBuilder
import com.twitter.hbc.core.Constants
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint
import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint
import com.twitter.hbc.core.processor.StringDelimitedProcessor
import com.twitter.hbc.core.endpoint.Location
import com.twitter.hbc.httpclient.auth.OAuth1
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import twitter4j.Status
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._


object TwitterReader {
  def apply(config: TwitterSourceConfig, context: SourceTaskContext) = {
    //endpoints
    val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) {
      new StatusesSampleEndpoint()
    } else {
      val trackEndpoint = new StatusesFilterEndpoint()
      val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) 
      if (!terms.isEmpty) {
        trackEndpoint.trackTerms(terms)
      }
      val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS)
      if (!locs.isEmpty) {
        val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList
            .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))})
            .asJava
        trackEndpoint.locations(locations)
      }
      val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) 
      if (!follow.isEmpty) {
        val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava
        trackEndpoint.followings(users)
      }
      trackEndpoint
    }
    endpoint.stallWarnings(false)
    val language = config.getList(TwitterSourceConfig.LANGUAGE) 
    if (!language.isEmpty) {
      // endpoint.languages(language) doesn't work as intended!
      endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(","))
    }

    //twitter auth stuff
    val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG),
      config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value,
      config.getString(TwitterSourceConfig.TOKEN_CONFIG),
      config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value)

    //batch size to take from the queue
    val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE)
    val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT)

    //The Kafka topic to append to
    val topic = config.getString(TwitterSourceConfig.TOPIC)

    //queue for client to buffer to
    val queue = new LinkedBlockingQueue[String](10000)

    //how the output is formatted
    val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match {
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure
    }

    //build basic client
    val client = new ClientBuilder()
      .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME))
      .hosts(Constants.STREAM_HOST)
      .endpoint(endpoint)
      .authentication(auth)
      .processor(new StringDelimitedProcessor(queue))
      .build()

    new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, 
        batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter)
  }
}

Source File: TwitterSourceTask.scala From kafka-tweet-producer with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


class TwitterSourceTask extends SourceTask with Logging {
  private var reader : Option[TwitterStatusReader] = null

  override def poll(): util.List[SourceRecord] = {
    require(reader.isDefined, "Twitter client not initialized!")
    reader.get.poll()
  }

  override def start(props: util.Map[String, String]): Unit = {
    val sourceConfig = new TwitterSourceConfig(props)
    reader = Some(TwitterReader(config = sourceConfig, context = context))
  }

  override def stop() = {
    reader.foreach(r=>r.stop())
  }
  override def version(): String = ""
}

Source File: MqttSourceTask.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.mqtt.source

import java.io.File
import java.util

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.mqtt.config.{MqttConfigConstants, MqttSourceConfig, MqttSourceSettings}
import com.datamountaineer.streamreactor.connect.mqtt.connection.MqttClientConnectionFn
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}

class MqttSourceTask extends SourceTask with StrictLogging {
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private var mqttManager: Option[MqttManager] = None
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {

    logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/mqtt-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val settings = MqttSourceSettings(MqttSourceConfig(conf))

    settings.sslCACertFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CA_CERT_CONFIG} is invalid. Can't locate $file")
      }
    }

    settings.sslCertFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_CONFIG} is invalid. Can't locate $file")
      }
    }

    settings.sslCertKeyFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_KEY_CONFIG} is invalid. Can't locate $file")
      }
    }

    val convertersMap = settings.sourcesToConverters.map { case (topic, clazz) =>
      logger.info(s"Creating converter instance for $clazz")
      val converter = Try(Class.forName(clazz).newInstance()) match {
        case Success(value) => value.asInstanceOf[Converter]
        case Failure(_) => throw new ConfigException(s"Invalid ${MqttConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!")
      }
      import scala.collection.JavaConverters._
      converter.initialize(conf.asScala.toMap)
      topic -> converter
    }

    logger.info("Starting Mqtt source...")
    mqttManager = Some(new MqttManager(MqttClientConnectionFn.apply, convertersMap, settings))
    enableProgress = settings.enableProgress
  }

  
  override def stop(): Unit = {
    logger.info("Stopping Mqtt source.")
    mqttManager.foreach(_.close())
    progressCounter.empty
  }

  override def version: String = manifest.version()
}

Source File: TwitterReader.scala From kafka-connect-twitter with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util.concurrent.LinkedBlockingQueue

import com.twitter.hbc.ClientBuilder
import com.twitter.hbc.core.Constants
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint
import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint
import com.twitter.hbc.core.processor.StringDelimitedProcessor
import com.twitter.hbc.core.endpoint.Location
import com.twitter.hbc.httpclient.auth.OAuth1
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import twitter4j.Status
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._


object TwitterReader {
  def apply(config: TwitterSourceConfig, context: SourceTaskContext) = {
    //endpoints
    val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) {
      new StatusesSampleEndpoint()
    } else {
      val trackEndpoint = new StatusesFilterEndpoint()
      val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) 
      if (!terms.isEmpty) {
        trackEndpoint.trackTerms(terms)
      }
      val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS)
      if (!locs.isEmpty) {
        val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList
            .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))})
            .asJava
        trackEndpoint.locations(locations)
      }
      val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) 
      if (!follow.isEmpty) {
        val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava
        trackEndpoint.followings(users)
      }
      trackEndpoint
    }
    endpoint.stallWarnings(false)
    val language = config.getList(TwitterSourceConfig.LANGUAGE) 
    if (!language.isEmpty) {
      // endpoint.languages(language) doesn't work as intended!
      endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(","))
    }

    //twitter auth stuff
    val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG),
      config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value,
      config.getString(TwitterSourceConfig.TOKEN_CONFIG),
      config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value)

    //batch size to take from the queue
    val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE)
    val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT)

    //The Kafka topic to append to
    val topic = config.getString(TwitterSourceConfig.TOPIC)

    //queue for client to buffer to
    val queue = new LinkedBlockingQueue[String](10000)

    //how the output is formatted
    val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match {
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure
    }

    //build basic client
    val client = new ClientBuilder()
      .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME))
      .hosts(Constants.STREAM_HOST)
      .endpoint(endpoint)
      .authentication(auth)
      .processor(new StringDelimitedProcessor(queue))
      .build()

    new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, 
        batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter)
  }
}

Source File: TwitterSourceTask.scala From kafka-connect-twitter with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


class TwitterSourceTask extends SourceTask with Logging {
  private var reader : Option[TwitterStatusReader] = null

  override def poll(): util.List[SourceRecord] = {
    require(reader.isDefined, "Twitter client not initialized!")
    reader.get.poll()
  }

  override def start(props: util.Map[String, String]): Unit = {
    val sourceConfig = new TwitterSourceConfig(props)
    reader = Some(TwitterReader(config = sourceConfig, context = context))
  }

  override def stop() = {
    reader.foreach(r=>r.stop())
  }
  override def version(): String = ""
}

Source File: TwitterStatusReader.scala From kafka-connect-twitter with Apache License 2.0

5 votes

package com.eneco.trading.kafka.connect.twitter

import java.util
import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors}
import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus
import com.twitter.hbc.httpclient.BasicClient
import com.twitter.hbc.twitter4j.Twitter4jStatusClient
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import twitter4j._
import scala.collection.JavaConverters._
import Extensions._

class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging {
  override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning")
  override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice")

  override def onScrubGeo(l: Long, l1: Long) = {
    log.debug(s"onScrubGeo $l $l1")
  }

  override def onStatus(status: Status) = {
    log.debug("onStatus")
    queue.put(status)
  }

  override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i")
  override def onException(e: Exception)= log.warn("onException " + e.toString)
}

trait StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord
}

object StatusToStringKeyValue extends StatusToSourceRecord {
  def convert (status: Status, topic: String): SourceRecord = {
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      Schema.STRING_SCHEMA,
      status.getText,
      status.getCreatedAt.getTime)
  }
}

object StatusToTwitterStatusStructure extends StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord = {
    //val ts = TwitterStatus.struct(TwitterStatus(status))
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      TwitterStatus.schema,
      TwitterStatus.struct(status),
      status.getCreatedAt.getTime)
  }
}


  def stop() = {
    log.info("Stop Twitter client")
    client.stop()
  }


}

Source File: JsonPassThroughConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.source

import java.util.Collections

import com.landoop.json.sql.JacksonJson
import org.apache.kafka.connect.source.SourceRecord


class JsonPassThroughConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")

    val json = new String(bytes, "utf-8")
    val jsonNode = JacksonJson.asJson(json)
    var keysValue = keys.flatMap { key =>
      Option(KeyExtractor.extract(jsonNode, key.split('.').toVector)).map(_.toString)
    }.mkString(keyDelimiter)

    // If keys are not provided, default one will be constructed
    if (keysValue == "") {
      keysValue = s"$sourceTopic$keyDelimiter$messageId"
    }

    new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
      null,
      kafkaTopic,
      null,
      keysValue,
      null,
      json)
  }
}

Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.source

import java.io.File
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import io.confluent.connect.avro.AvroData
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.io.DecoderFactory
import org.apache.avro.{Schema => AvroSchema}
import org.apache.kafka.connect.data.{Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException


class AvroConverter extends Converter {
  private val avroData = new AvroData(8)
  private var sourceToSchemaMap: Map[String, AvroSchema] = Map.empty
  private var avroReadersMap: Map[String, GenericDatumReader[GenericRecord]] = Map.empty

  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    Option(bytes) match {
      case None =>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          avroData.toConnectSchema(sourceToSchemaMap(sourceTopic)),
          null)
      case Some(_) =>
        val reader = avroReadersMap.getOrElse(sourceTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $sourceTopic"))
        val decoder = DecoderFactory.get().binaryDecoder(bytes, null)
        val record = reader.read(null, decoder)
        val schemaAndValue = avroData.toConnectData(sourceToSchemaMap(sourceTopic.toLowerCase), record)
        val value = schemaAndValue.value()
        value match {
          case s: Struct if keys.nonEmpty =>
            val keysValue = keys.flatMap { key =>
              Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
            }.mkString(keyDelimiter)
            new SourceRecord(
              Collections.singletonMap(Converter.TopicKey, sourceTopic),
              null,
              kafkaTopic,
              Schema.STRING_SCHEMA,
              keysValue,
              schemaAndValue.schema(),
              schemaAndValue.value())
          case _ =>
            new SourceRecord(
              Collections.singletonMap(Converter.TopicKey, sourceTopic),
              null,
              kafkaTopic,
              MsgKey.schema,
              MsgKey.getStruct(sourceTopic, messageId),
              schemaAndValue.schema(),
              schemaAndValue.value())
        }

    }
  }

  override def initialize(config: Map[String, String]): Unit = {
    sourceToSchemaMap = AvroConverter.getSchemas(config)
    avroReadersMap = sourceToSchemaMap.map { case (key, schema) =>
      key -> new GenericDatumReader[GenericRecord](schema)
    }
  }
}

object AvroConverter {
  val SCHEMA_CONFIG = "connect.source.converter.avro.schemas"

  def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = {
    config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided"))
      .toString
      .split(';')
      .filter(_.trim.nonEmpty)
      .map(_.split("="))
      .map {
        case Array(source, path) =>
          val file = new File(path)
          if (!file.exists()) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!")
          }
          val s = source.trim.toLowerCase()
          if (s.isEmpty) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path")
          }
          s -> new AvroSchema.Parser().parse(file)
        case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Source->AVRO_FILE")
      }.toMap
  }
}

Source File: JsonSimpleConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.source

import java.nio.charset.Charset
import java.util
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord


class JsonSimpleConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys:Seq[String] = Seq.empty,
                       keyDelimiter:String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")
    val json = new String(bytes, Charset.defaultCharset)
    val schemaAndValue = JsonSimpleConverter.convert(sourceTopic, json)
    val value = schemaAndValue.value()
    value match {
      case s:Struct if keys.nonEmpty =>
        val keysValue = keys.flatMap { key =>
          Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
        }.mkString(keyDelimiter)

        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          Schema.STRING_SCHEMA,
          keysValue,
          schemaAndValue.schema(),
          schemaAndValue.value())
      case _=>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          MsgKey.schema,
          MsgKey.getStruct(sourceTopic, messageId),
          schemaAndValue.schema(),
          schemaAndValue.value())
    }

  }
}

object JsonSimpleConverter {

  import org.json4s._
  import org.json4s.native.JsonMethods._

  def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str))

  def convert(name: String, value: JValue): SchemaAndValue = {
    value match {
      case JArray(arr) =>
        val values = new util.ArrayList[AnyRef]()
        val sv = convert(name, arr.head)
        values.add(sv.value())
        arr.tail.foreach { v => values.add(convert(name, v).value()) }

        val schema = SchemaBuilder.array(sv.schema()).optional().build()
        new SchemaAndValue(schema, values)
      case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b)
      case JDecimal(d) =>
        val schema = Decimal.builder(d.scale).optional().build()
        new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal))
      case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d)
      case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries)
      case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l)
      case JNull | JNothing => new SchemaAndValue(Schema.STRING_SCHEMA, null)
      case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s)
      case JObject(values) =>
        val builder = SchemaBuilder.struct().name(name.replace("/", "_"))

        val fields = values.map { case (n, v) =>
          val schemaAndValue = convert(n, v)
          builder.field(n, schemaAndValue.schema())
          n -> schemaAndValue.value()
        }.toMap
        val schema = builder.build()

        val struct = new Struct(schema)
        fields.foreach { case (field, v) => struct.put(field, v) }

        new SchemaAndValue(schema, struct)
    }
  }
}

Source File: JsonOptNullConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.source

import java.nio.charset.Charset
import java.util
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord


class JsonOptNullConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys:Seq[String] = Seq.empty,
                       keyDelimiter:String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")
    val json = new String(bytes, Charset.defaultCharset)
    val schemaAndValue = JsonOptNullConverter.convert(sourceTopic, json)
    val value = schemaAndValue.value()
    value match {
      case s:Struct if keys.nonEmpty =>
        val keysValue = keys.flatMap { key =>
          Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
        }.mkString(keyDelimiter)

        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          Schema.STRING_SCHEMA,
          keysValue,
          schemaAndValue.schema(),
          schemaAndValue.value())
      case _=>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          MsgKey.schema,
          MsgKey.getStruct(sourceTopic, messageId),
          schemaAndValue.schema(),
          schemaAndValue.value())
    }

  }
}

object JsonOptNullConverter {

  import org.json4s._
  import org.json4s.native.JsonMethods._

  def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str))

  def convert(name: String, value: JValue): SchemaAndValue = {
    value match {
      case JArray(arr) =>
        val values = new util.ArrayList[AnyRef]()
        val sv = convert(name, arr.head)
        values.add(sv.value())
        arr.tail.foreach { v => values.add(convert(name, v).value()) }

        val schema = SchemaBuilder.array(sv.schema()).optional().build()
        new SchemaAndValue(schema, values)
      case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b)
      case JDecimal(d) =>
        val schema = Decimal.builder(d.scale).optional().build()
        new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal))
      case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d)
      case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries)
      case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l)
      case JNull | JNothing => new SchemaAndValue(Schema.OPTIONAL_STRING_SCHEMA, null)
      case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s)
      case JObject(values) =>
        val builder = SchemaBuilder.struct().name(name.replace("/", "_"))

        val fields = values.map { case (n, v) =>
          val schemaAndValue = convert(n, v)
          builder.field(n, schemaAndValue.schema())
          n -> schemaAndValue.value()
        }.toMap
        val schema = builder.build()

        val struct = new Struct(schema)
        fields.foreach { case (field, v) => struct.put(field, v) }

        new SchemaAndValue(schema, struct)
    }
  }
}

Source File: BytesConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.source

import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

class BytesConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
      null,
      kafkaTopic,
      MsgKey.schema,
      MsgKey.getStruct(sourceTopic, messageId),
      Schema.BYTES_SCHEMA,
      bytes)
  }
}

Source File: TableQuerier.scala From kafka-connect-sap with Apache License 2.0

5 votes

package com.sap.kafka.connect.source.querier

import com.sap.kafka.client.hana.HANAJdbcClient
import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants}
import com.sap.kafka.connect.config.hana.HANAConfig
import com.sap.kafka.utils.hana.HANAJdbcTypeConverter
import org.apache.kafka.connect.data.{Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord
import org.slf4j.LoggerFactory

import scala.util.Random

abstract class TableQuerier(mode: String, tableOrQuery: String,
                            topic: String, config: BaseConfig,
                            var jdbcClient: Option[HANAJdbcClient])
                extends Comparable[TableQuerier] {
  var tableName: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_TABLE)) tableOrQuery else null
  var query: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_SQL)) tableOrQuery else null

  var lastUpdate: Long = 0
  var schema: Schema = _
  var queryString: Option[String] = None
  var resultList: Option[List[Struct]] = None

  val log = LoggerFactory.getLogger(getClass)

  def getLastUpdate(): Long = lastUpdate

  def getOrCreateQueryString(): Option[String] = {
    createQueryString()
    queryString
  }

  def createQueryString(): Unit

  def querying(): Boolean = resultList.isDefined

  def maybeStartQuery(): Unit = {
    if (resultList.isEmpty) {
      schema = getSchema()
      queryString = getOrCreateQueryString()

      val batchMaxRows = config.batchMaxRows
      resultList = getOrCreateJdbcClient().get.executeQuery(schema, queryString.get,
        0, batchMaxRows)
      log.info(resultList.size.toString)
    }
  }

  def extractRecords(): List[SourceRecord]

  def close(now: Long): Unit = {
    resultList = None
    schema = null

    lastUpdate = now
  }

  protected def getOrCreateJdbcClient(): Option[HANAJdbcClient] = {
    if (jdbcClient.isDefined) {
      return jdbcClient
    }

    config match {
      case hanaConfig: HANAConfig => Some(HANAJdbcClient(hanaConfig))
      case _ => throw new RuntimeException("Cannot create Jdbc Client")
    }
  }

  private def getSchema(): Schema = {
    mode match {
      case BaseConfigConstants.QUERY_MODE_TABLE =>
        if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) {
          val metadata = getOrCreateJdbcClient().get.getMetaData(tableOrQuery, None)
          HANAJdbcTypeConverter.convertHANAMetadataToSchema(tableName, metadata)
        } else {
          throw new RuntimeException("Jdbc Client is not available")
        }
      case BaseConfigConstants.QUERY_MODE_SQL =>
        if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) {
          val metadata = getOrCreateJdbcClient().get.getMetadata(tableOrQuery)
          HANAJdbcTypeConverter.convertHANAMetadataToSchema("Query" + Random.nextInt, metadata)
        } else {
          throw new RuntimeException("Jdbc Client is not available")
        }
      case _ =>
        throw new RuntimeException("Other Query modes are not supported")
    }
  }

  override def compareTo(other: TableQuerier): Int = {
    if (this.lastUpdate < other.lastUpdate) {
      -1
    } else if (this.lastUpdate > other.lastUpdate) {
      0
    } else {
      this.tableName.compareTo(other.tableName)
    }
  }
}

Source File: TestMapKeyToString.scala From kafka-connect-transformers with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.transforms

import java.util

import com.datamountaineer.streamreactor.connect.transforms.MapKeyToString.Value
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.transforms.util.Requirements.requireStruct
import org.scalatest.{Matchers, WordSpec}

import scala.collection.JavaConversions._

class TestMapKeyToString extends WordSpec with Matchers {
  val MAP_SCHEMA = SchemaBuilder.map(Schema.OPTIONAL_INT64_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA).optional().build();

  val FIELDS_CONFIG = "fields"

  "should transform all map key to string schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      FIELDS_CONFIG -> "map1, map2")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    schema.field("map1").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map1").get("1").toString shouldBe "value1-1"

    schema.field("map2").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map2").get("1").toString shouldBe "value2-1"
  }

  "should transform only one map key to string schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      FIELDS_CONFIG -> "map1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    schema.field("map1").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map1").get("1").toString shouldBe "value1-1"

    schema.field("map2").schema().keySchema().`type`().getName shouldBe "int64"
    value.getMap("map2").get(1L).toString shouldBe "value2-1"
  }

  private def mockRecord(withSchema: Boolean) = {
    val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc")
      .field("magic", Schema.OPTIONAL_INT64_SCHEMA)
      .field("map1", MAP_SCHEMA)
      .field("map2", MAP_SCHEMA)
      .build

    val simpleStruct = new Struct(simpleStructSchema)
      .put("magic", 42L)
      .put("map1", new util.HashMap[Long, String]{
        put(1L,"value1-1")
        put(2L,"value1-2")
      })
      .put("map2", new util.HashMap[Long, String]{
        put(1L,"value2-1")
        put(2L,"value2-2")
      })

    new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct)
  }

}

Source File: TestNestingFields.scala From kafka-connect-transformers with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.transforms

import java.util.Date

import com.datamountaineer.streamreactor.connect.transforms.NestingFields.Value
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.transforms.util.Requirements.requireStruct
import org.scalatest.{Matchers, WordSpec}

import scala.collection.JavaConversions._

class TestNestingFields extends WordSpec with Matchers {
  val OPTIONAL_TIMESTAMP_SCHEMA = Timestamp.builder().optional().build()
  val OPTIONAL_DECIMAL_SCHEMA = Decimal.builder(18).optional().build()

  private val NESTED_NAME_CONFIG = "nested.name"
  private val FIELDS_CONFIG = "fields"

  "should append another field with two nested fields when have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "dateValue1, decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    val nestedSchema = schema.field("id").schema()
    val nestedValue =  requireStruct(value.get("id"), null)

    nestedSchema.field("dateValue1").schema().`type`() shouldBe schema.field("dateValue1").schema().`type`()
    nestedValue.get("dateValue1") shouldBe value.get("dateValue1")

    nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`()
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  "should append another field with one nested fields when have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    val nestedSchema = schema.field("id").schema()
    val nestedValue =  requireStruct(value.get("id"), null)

    nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`()
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  "should append another field with one nested fields when don't have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)

    val nestedValue =  requireStruct(value.get("id"), null)
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  private def mockRecord(withSchema: Boolean) = {
    val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc")
      .field("magic", Schema.OPTIONAL_INT64_SCHEMA)
      .field("dateValue1", OPTIONAL_TIMESTAMP_SCHEMA)
      .field("decimalValue1", OPTIONAL_DECIMAL_SCHEMA)
      .build

    val simpleStruct = new Struct(simpleStructSchema)
      .put("magic", 42L)
      .put("dateValue1", new Date())
      .put("decimalValue1", BigDecimal(10.6).bigDecimal.setScale(18))

    new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct)
  }

}

Source File: CassandraSourceTask.scala From kafka-connect-cassandra with Apache License 2.0

5 votes

package com.tuplejump.kafka.connect.cassandra

import java.util.{List => JList, Map => JMap, ArrayList => JArrayList}

import org.apache.kafka.connect.connector.Task
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


  override def poll: JList[SourceRecord] = {

    val records = new JArrayList[SourceRecord]()
    val offset = EmptyJMap//context.offsetStorageReader.offset(EmptyJMap) //TODO
    val partition = EmptyJMap //TODO

    for {
      sc       <- taskConfig.source
      iterator <- page(sc)
      row      <- iterator
    } {
      val record = row.as(sc.schema.route.topic, partition, offset)
      records.add(record)
      if (iterator.done) checkpoint = None //TODO
      record
    }

    records
  }

  private def page(sc: SourceConfig): Option[AsyncPagingSourceIterator] = {
    //TODO need CDC: https://github.com/tuplejump/kafka-connector/issues/9
    val query = sc.query match {
      case q if q.hasPatternT =>
        //TODO remove Thread.sleep with better option like timestamp.fromNow...etc
        Thread.sleep(sc.query.pollInterval)
        sc.query.slide
      case q =>
        // TODO typed: https://tuplejump.atlassian.net/browse/DB-56 timeuuid,timestamp...
        // by type: WHERE {columnToMove} > checkpoint.value with columnType
        sc.query
    }

    val rs = session.execute(query.cql)
    if (rs.getAvailableWithoutFetching > 0) Some(new AsyncPagingSourceIterator(rs, sc.options.fetchSize))
    else None
  }
}

Source File: TimeBasedDataService.scala From kafka-jdbc-connector with Apache License 2.0

5 votes

package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.{Date, GregorianCalendar, TimeZone}

import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL}
import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode
import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.Try


case class TimeBasedDataService(databaseProduct: DatabaseProduct,
                                storedProcedureName: String,
                                batchSize: Int,
                                batchSizeVariableName: String,
                                timestampVariableName: String,
                                var timestampOffset: Long,
                                timestampFieldName: String,
                                topic: String,
                                keyFieldOpt: Option[String],
                                dataConverter: DataConverter,
                                calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"))
                               ) extends DataService {

  override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try {
    val preparedStatement = databaseProduct match {
      case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?")
      case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)")
    }
    preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar)
    preparedStatement.setObject(2, batchSize)
    preparedStatement
  }

  override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try {
    val sourceRecords = ListBuffer.empty[SourceRecord]
    var max = timestampOffset
    while (resultSet.next()) {
      dataConverter.convertRecord(schema, resultSet) map { record =>
        val time = record.get(timestampFieldName).asInstanceOf[Date].getTime
        max = if(time > max) {
          keyFieldOpt match {
            case Some(keyField) =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record
              )
            case None           =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, schema, record
              )
          }
          time
        } else max
      }
    }
    timestampOffset = max
    sourceRecords
  }

  override def toString: String = {
    s"""
       |{
       |   "name" : "${this.getClass.getSimpleName}"
       |   "mode" : "${TimestampMode.entryName}"
       |   "stored-procedure.name" : "$storedProcedureName"
       |}
    """.stripMargin
  }
}

Source File: DataService.scala From kafka-jdbc-connector with Apache License 2.0

5 votes

package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet}

import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.concurrent.duration.Duration
import scala.util.Try

trait DataService {


  def getRecords(connection: Connection, timeout: Duration): Try[Seq[SourceRecord]] = {
    for {
      preparedStatement <- createPreparedStatement(connection)
      resultSet         <- executeStoredProcedure(preparedStatement, timeout)
      schema            <- dataConverter.convertSchema(storedProcedureName, resultSet.getMetaData)
      records           <- extractRecords(resultSet, schema)
    } yield records
  }

  protected def createPreparedStatement(connection: Connection): Try[PreparedStatement]

  protected def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]]

  private def executeStoredProcedure(preparedStatement: PreparedStatement, timeout: Duration): Try[ResultSet] = Try {
    preparedStatement.setQueryTimeout(timeout.toSeconds.toInt)
    preparedStatement.executeQuery
  }
}

Source File: DataServiceTest.scala From kafka-jdbc-connector with Apache License 2.0

5 votes

package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, ResultSetMetaData}

import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import org.scalatest.mockito.MockitoSugar
import org.mockito.Mockito._
import org.scalatest.{Matchers, WordSpec}

import scala.concurrent.duration._
import scala.util.Success

class DataServiceTest extends WordSpec with Matchers with MockitoSugar {

  "Data Service" should {

    val spName = "stored-procedure"
    val connection = mock[Connection]
    val converter = mock[DataConverter]
    val sourceRecord1 = mock[SourceRecord]
    val sourceRecord2 = mock[SourceRecord]
    val resultSet = mock[ResultSet]
    val resultSetMetadata = mock[ResultSetMetaData]
    val preparedStatement = mock[PreparedStatement]
    val schema = mock[Schema]

    val dataService = new DataService {

      override def storedProcedureName: String = spName

      override protected def createPreparedStatement(connection: Connection) = Success(preparedStatement)

      override protected def extractRecords(resultSet: ResultSet, schema: Schema) = Success(Seq(sourceRecord1, sourceRecord2))

      override def dataConverter: DataConverter = converter
    }

    "get records" in {
      doNothing().when(preparedStatement).setQueryTimeout(1)
      when(preparedStatement.executeQuery).thenReturn(resultSet)
      when(resultSet.getMetaData).thenReturn(resultSetMetadata)
      when(converter.convertSchema(spName, resultSetMetadata)).thenReturn(Success(schema))

      dataService.getRecords(connection, 1.second) shouldBe Success(Seq(sourceRecord1, sourceRecord2))

      verify(preparedStatement).setQueryTimeout(1)
      verify(preparedStatement).executeQuery
      verify(resultSet).getMetaData
      verify(converter).convertSchema(spName, resultSetMetadata)
    }
  }
}

Source File: IotHubPartitionSource.scala From toketi-kafka-connect-iothub with MIT License

5 votes

// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source

import java.util.{Collections, Map}

import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.mutable.ListBuffer
import scala.util.control.NonFatal

class IotHubPartitionSource(val dataReceiver: DataReceiver,
    val partition: String,
    val topic: String,
    val batchSize: Int,
    val eventHubName: String,
    val sourcePartition: Map[String, String])
  extends LazyLogging
    with JsonSerialization {

  def getRecords: List[SourceRecord] = {

    logger.debug(s"Polling for data from eventHub $eventHubName partition $partition")
    val list = ListBuffer.empty[SourceRecord]
    try {
      val messages: Iterable[IotMessage] = this.dataReceiver.receiveData(batchSize)

      if (messages.isEmpty) {
        logger.debug(s"Finished processing all messages from eventHub $eventHubName " +
          s"partition ${this.partition}")
      } else {
        logger.debug(s"Received ${messages.size} messages from eventHub $eventHubName " +
          s"partition ${this.partition} (requested $batchSize batch)")

        for (msg: IotMessage <- messages) {

          val kafkaMessage: Struct = IotMessageConverter.getIotMessageStruct(msg)
          val sourceOffset = Collections.singletonMap("EventHubOffset",
            kafkaMessage.getString(IotMessageConverter.offsetKey))
          val sourceRecord = new SourceRecord(sourcePartition, sourceOffset, this.topic, kafkaMessage.schema(),
            kafkaMessage)
          list += sourceRecord
        }
      }
    } catch {
      case NonFatal(e) =>
        val errorMsg = s"Error while getting SourceRecords for eventHub $eventHubName " +
          s"partition $partition. Exception - ${e.toString} Stack trace - ${e.printStackTrace()}"
        logger.error(errorMsg)
        throw new ConnectException(errorMsg, e)
    }
    logger.debug(s"Obtained ${list.length} SourceRecords from IotHub")
    list.toList
  }
}

Source File: SQSSourceTask.scala From sqs-kafka-connect with Apache License 2.0

5 votes

package com.hivehome.kafka.connect.sqs

import java.util.{List => JList, Map => JMap}
import javax.jms._

import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Try
import scala.util.control.NonFatal

object SQSSourceTask {
  private val SqsQueueField: String = "queue"
  private val MessageId: String = "messageId"
  private val ValueSchema = Schema.STRING_SCHEMA
}

class SQSSourceTask extends SourceTask {
  val logger = LoggerFactory.getLogger(getClass.getName)
  private var conf: Conf = _
  private var consumer: MessageConsumer = null
  // MessageId to MessageHandle used to ack the message on the commitRecord method invocation
  private var unAcknowledgedMessages = Map[String, Message]()

  def version: String = Version()

  def start(props: JMap[String, String]): Unit = {
    conf = Conf.parse(props.asScala.toMap).get

    logger.debug("Creating consumer...")
    synchronized {
      try {
        consumer = SQSConsumer(conf)
        logger.info("Created consumer to  SQS topic {} for reading", conf.queueName)
      }
      catch {
        case NonFatal(e) => logger.error("Exception", e)
      }
    }
  }

  import com.hivehome.kafka.connect.sqs.SQSSourceTask._

  @throws(classOf[InterruptedException])
  def poll: JList[SourceRecord] = {
    def toRecord(msg: Message): SourceRecord = {
      val extracted = MessageExtractor(msg)
      val key = Map(SqsQueueField -> conf.queueName.get).asJava
      val value = Map(MessageId -> msg.getJMSMessageID).asJava
      new SourceRecord(key, value, conf.topicName.get, ValueSchema, extracted)
    }

    assert(consumer != null) // should be initialised as part of start()
    Try {
      Option(consumer.receive).map { msg =>
        logger.info("Received message {}", msg)

        // This operation is not threadsafe as a result the plugin is not threadsafe.
        // However KafkaConnect assigns a single thread to each task and the poll
        // method is always called by a single thread.
        unAcknowledgedMessages = unAcknowledgedMessages.updated(msg.getJMSMessageID, msg)

        toRecord(msg)
      }.toSeq
    }.recover {
      case NonFatal(e) =>
        logger.error("Exception while processing message", e)
        List.empty
    }.get.asJava
  }

  @throws(classOf[InterruptedException])
  override def commitRecord(record: SourceRecord): Unit = {
    val msgId = record.sourceOffset().get(MessageId).asInstanceOf[String]
    val maybeMsg = unAcknowledgedMessages.get(msgId)
    maybeMsg.foreach(_.acknowledge())
    unAcknowledgedMessages = unAcknowledgedMessages - msgId
  }

  def stop() {
    logger.debug("Stopping task")
    synchronized {
      unAcknowledgedMessages = Map()
      try {
        if (consumer != null) {
          consumer.close()
          logger.debug("Closed input stream")
        }
      }
      catch {
        case NonFatal(e) => logger.error("Failed to close consumer stream: ", e)
      }
      this.notify()
    }
  }
}

Source File: ValidatorTask.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.connector.validation

import java.util
import java.util.concurrent.TimeUnit

import oharastream.ohara.client.configurator.InspectApi.{RdbInfo, RdbQuery}
import oharastream.ohara.client.configurator.{ErrorApi, InspectApi}
import oharastream.ohara.client.database.DatabaseClient
import oharastream.ohara.common.data.Serializer
import oharastream.ohara.common.util.VersionUtils
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import spray.json.{JsObject, _}

import scala.jdk.CollectionConverters._
class ValidatorTask extends SourceTask {
  private[this] var done                       = false
  private[this] var props: Map[String, String] = _
  private[this] val topic: String              = InspectApi.INTERNAL_TOPIC_KEY.topicNameOnKafka
  private[this] var requestId: String          = _
  override def start(props: util.Map[String, String]): Unit = {
    this.props = props.asScala.toMap
    requestId = require(InspectApi.REQUEST_ID)
  }

  override def poll(): util.List[SourceRecord] =
    if (done) {
      // just wait the configurator to close this connector
      TimeUnit.SECONDS.sleep(2)
      null
    } else
      try information match {
        case query: RdbQuery => toSourceRecord(validate(query))
      } catch {
        case e: Throwable => toSourceRecord(ErrorApi.of(e))
      } finally done = true

  override def stop(): Unit = {
    // do nothing
  }

  override def version(): String = VersionUtils.VERSION

  private[this] def validate(query: RdbQuery): RdbInfo = {
    val client = DatabaseClient.builder.url(query.url).user(query.user).password(query.password).build
    try RdbInfo(
      name = client.databaseType,
      tables = client.tableQuery
        .catalog(query.catalogPattern.orNull)
        .schema(query.schemaPattern.orNull)
        .tableName(query.tableName.orNull)
        .execute()
    )
    finally client.close()
  }

  private[this] def toJsObject: JsObject = props(InspectApi.SETTINGS_KEY).parseJson.asJsObject
  private[this] def information = require(InspectApi.TARGET_KEY) match {
    case InspectApi.RDB_KIND => InspectApi.RDB_QUERY_FORMAT.read(toJsObject)
    case other: String =>
      throw new IllegalArgumentException(
        s"valid targets are ${InspectApi.RDB_KIND}. current is $other"
      )
  }

  private[this] def toSourceRecord(data: Object): util.List[SourceRecord] =
    util.Arrays.asList(
      new SourceRecord(
        null,
        null,
        topic,
        Schema.BYTES_SCHEMA,
        Serializer.STRING.to(requestId),
        Schema.BYTES_SCHEMA,
        Serializer.OBJECT.to(data)
      )
    )

  private[this] def require(key: String): String =
    props.getOrElse(key, throw new IllegalArgumentException(s"the $key is required"))
}

Source File: BulkTableQuerier.scala From kafka-connect-sap with Apache License 2.0

5 votes

package com.sap.kafka.connect.source.querier

import com.sap.kafka.client.hana.HANAJdbcClient
import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants}
import com.sap.kafka.connect.source.SourceConnectorConstants
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._

class BulkTableQuerier(mode: String, tableOrQuery: String, tablePartition: Int, topic: String,
                       config: BaseConfig, jdbcClient: Option[HANAJdbcClient])
  extends TableQuerier(mode, tableOrQuery, topic, config, jdbcClient) {
  override def createQueryString(): Unit = {
    mode match {
      case BaseConfigConstants.QUERY_MODE_TABLE =>
        if (tablePartition > 0) {
          queryString = Some(s"select * from $tableName PARTITION($tablePartition)")
        } else {
          queryString = Some(s"select * from $tableName")
        }
      case BaseConfigConstants.QUERY_MODE_SQL =>
        queryString = Some(query)
    }
  }

  override def extractRecords(): List[SourceRecord] = {
    if (resultList.isDefined) {
      resultList.get.map(record => {
        var partition: Map[String, String] = null

        mode match {
          case BaseConfigConstants.QUERY_MODE_TABLE =>
            partition = Map(SourceConnectorConstants.TABLE_NAME_KEY -> tableName)
          case BaseConfigConstants.QUERY_MODE_SQL =>
            val partitionName = "Query"
            partition = Map(SourceConnectorConstants.QUERY_NAME_KEY -> partitionName)
          case _ => throw new ConfigException(s"Unexpected query mode: $mode")
        }
        new SourceRecord(partition.asJava, null, topic,
          getPartition(tablePartition, topic), record.schema(), record)
      })
    }
    else List()
  }

  override def toString: String = "BulkTableQuerier{" +
    "name='" + tableOrQuery + '\'' +
    ", topic='" + topic + '\'' +
    '}'

  
  private def getPartition(tablePartition: Int, topic: String): Int = {
    val topicProperties = config.topicProperties(topic)
    val maxPartitions = topicProperties("partition.count").toInt
    tablePartition % maxPartitions
  }
}

Source File: HANASourceTaskConversionTest.scala From kafka-connect-sap with Apache License 2.0

5 votes

package com.sap.kafka.connect.source

import com.sap.kafka.client.MetaSchema
import org.apache.kafka.connect.data.Schema.Type
import org.apache.kafka.connect.data.{Field, Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._

class HANASourceTaskConversionTest extends HANASourceTaskTestBase {

  override def beforeAll(): Unit = {
    super.beforeAll()
    task.start(singleTableConfig())
  }

  override def afterAll(): Unit = {
    task.stop()
    super.afterAll()
  }

  test("boolean type") {
    typeConversion(Schema.BOOLEAN_SCHEMA, true, java.lang.Boolean.FALSE,
      Schema.BOOLEAN_SCHEMA, java.lang.Boolean.FALSE)
  }

  test("int type") {
    typeConversion(Schema.INT32_SCHEMA, true, new java.lang.Integer(1),
      Schema.INT32_SCHEMA, new Integer(1))
  }

  test("long type") {
    typeConversion(Schema.INT64_SCHEMA, true, new java.lang.Long(1),
      Schema.INT64_SCHEMA, new java.lang.Long(1))
  }

  test("double type") {
    typeConversion(Schema.FLOAT64_SCHEMA, true, new java.lang.Double(1.0),
      Schema.FLOAT64_SCHEMA, new java.lang.Double(1.0))
  }

  test("string type") {
    typeConversion(Schema.STRING_SCHEMA, true, "'a'",
      Schema.STRING_SCHEMA, "a")
  }

  private def typeConversion(sqlType: Schema, nullable: Boolean,
                             sqlValue: Object, convertedSchema: Schema,
                             convertedValue: Object): Unit = {
    val fields = Seq(new Field("id", 1, sqlType))
    jdbcClient.createTable(Some("TEST"), "EMPLOYEES_SOURCE", MetaSchema(null, fields),
      3000)
    val connection = jdbcClient.getConnection
    val stmt = connection.createStatement()
    stmt.execute("insert into \"TEST\".\"EMPLOYEES_SOURCE\" values(" + sqlValue.toString + ")")
    val records = task.poll()
    validateRecords(records.asScala.toList, convertedSchema, convertedValue)
    stmt.execute("drop table \"TEST\".\"EMPLOYEES_SOURCE\"")
  }

  private def validateRecords(records: List[SourceRecord], expectedFieldSchema: Schema,
                              expectedValue: Object): Unit = {
    assert(records.size === 1)
    val objValue = records.head.value()
    assert(objValue.isInstanceOf[Struct])
    val value = objValue.asInstanceOf[Struct]

    val schema = value.schema()
    assert(Type.STRUCT === schema.`type`())
    val fields = schema.fields()

    assert(fields.size() === 1)

    val fieldSchema = fields.get(0).schema()
    assert(expectedFieldSchema === fieldSchema)

    assert(expectedValue === value.get(fields.get(0)))
  }
}

Source File: Transaction.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.blockchain.data

import java.util

import com.datamountaineer.streamreactor.connect.blockchain.data.Input._
import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct}
import org.apache.kafka.connect.source.SourceRecord

case class Transaction(lock_time: Long,
                       ver: Int,
                       size: Long,
                       inputs: Seq[Input],
                       rbf: Option[Boolean],
                       time: Long,
                       tx_index: Long,
                       vin_sz: Int,
                       hash: String,
                       vout_sz: Int,
                       relayed_by: String,
                       out: Seq[Output])


object Transaction {
  val ConnectSchema: Schema = SchemaBuilder.struct
    .name("datamountaineer.blockchain.transaction")
    .field("lock_time", Schema.INT64_SCHEMA)
    .field("ver", Schema.INT32_SCHEMA)
    .field("size", Schema.INT64_SCHEMA)
    .field("inputs", SchemaBuilder.array(Input.ConnectSchema).optional().build())
    .field("rbf", Schema.OPTIONAL_BOOLEAN_SCHEMA)
    .field("time", Schema.INT64_SCHEMA)
    .field("tx_index", Schema.INT64_SCHEMA)
    .field("vin_sz", Schema.INT32_SCHEMA)
    .field("hash", Schema.STRING_SCHEMA)
    .field("vout_sz", Schema.INT32_SCHEMA)
    .field("relayed_by", Schema.STRING_SCHEMA)
    .field("out", SchemaBuilder.array(Output.ConnectSchema).optional().build())
    .build()

  implicit class TransactionToSourceRecordConverter(val tx: Transaction) extends AnyVal {
    def toSourceRecord(topic: String, partition: Int, key: Option[String]): SourceRecord = {
      new SourceRecord(
        null,
        null,
        topic,
        partition,
        key.map(_ => Schema.STRING_SCHEMA).orNull,
        key.orNull,
        ConnectSchema,
        tx.toStruct()
      )
    }

    //private def getOffset() = Collections.singletonMap("position", System.currentTimeMillis())

    def toStruct(): Struct = {
      val struct = new Struct(ConnectSchema)
        .put("lock_time", tx.lock_time)
        .put("ver", tx.ver)
        .put("size", tx.size)
        .put("time", tx.time)
        .put("tx_index", tx.tx_index)
        .put("vin_sz", tx.vin_sz)
        .put("hash", tx.hash)
        .put("vout_sz", tx.vout_sz)
        .put("relayed_by", tx.relayed_by)

      tx.out.headOption.foreach { _ =>
        import scala.collection.JavaConverters._
        struct.put("out", tx.out.map(_.toStruct()).asJava)
      }
      tx.rbf.foreach(struct.put("rbf", _))
      tx.inputs.headOption.foreach { _ =>
        val inputs = new util.ArrayList[Struct]
        tx.inputs.foreach(i => inputs.add(i.toStruct()))
        struct.put("inputs", inputs)
      }
      tx.out.headOption.foreach { _ =>
        val outputs = new util.ArrayList[Struct]
        tx.out.foreach(output => outputs.add(output.toStruct()))
      }

      struct
    }
  }

}

Source File: JMSReader.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.jms.source.readers

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.jms.JMSSessionProvider
import com.datamountaineer.streamreactor.connect.jms.config.JMSSettings
import com.datamountaineer.streamreactor.connect.jms.source.domain.JMSStructMessage
import com.typesafe.scalalogging.StrictLogging
import javax.jms.{Message, MessageConsumer}
import org.apache.kafka.connect.source.SourceRecord

import scala.util.Try


class JMSReader(settings: JMSSettings) extends StrictLogging {

  val provider = JMSSessionProvider(settings)
  provider.start()
  val consumers: Vector[(String, MessageConsumer)] = (provider.queueConsumers ++ provider.topicsConsumers).toVector
  val convertersMap: Map[String, Option[Converter]] = settings.settings.map(s => (s.source, s.sourceConverters)).toMap
  val topicsMap: Map[String, String] = settings.settings.map(s => (s.source, s.target)).toMap

  def poll(): Vector[(Message, SourceRecord)] = {
    val messages = consumers
      .flatMap({ case (source, consumer) =>
        (0 to settings.batchSize)
          .flatMap(_ => Option(consumer.receiveNoWait()))
          .map(m => (m, convert(source, topicsMap(source), m)))
      })

    messages
  }

  def convert(source: String, target: String, message: Message): SourceRecord = {
    convertersMap(source).getOrElse(None) match {
      case c: Converter => c.convert(target, source, message.getJMSMessageID, JMSStructMessage.getPayload(message))
      case None => JMSStructMessage.getStruct(target, message)
    }
  }

  def stop: Try[Unit] = provider.close()
}

object JMSReader {
  def apply(settings: JMSSettings): JMSReader = new JMSReader(settings)
}

Source File: HiveSource.scala From stream-reactor with Apache License 2.0

5 votes

package com.landoop.streamreactor.connect.hive.source

import com.landoop.streamreactor.connect.hive
import com.landoop.streamreactor.connect.hive._
import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record}
import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig
import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper}
import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.metastore.IMetaStoreClient
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._


class HiveSource(db: DatabaseName,
                 tableName: TableName,
                 topic: Topic,
                 offsetReader: HiveSourceOffsetStorageReader,
                 config: HiveSourceConfig)
                (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] {

  val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic)
    .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}"))

  private val table = client.getTable(db.value, tableName.value)
  private val format = HiveFormat(hive.serde(table))
  private val metastoreSchema = HiveSchemas.toKafka(table)
  private val parts = TableFileScanner.scan(db, tableName)

  private val readers = parts.map { case (path, partition) =>

    val fns: Seq[Struct => Struct] = Seq(
      partition.map(new PartitionValueMapper(_).map _),
      tableConfig.projection.map(new ProjectionMapper(_).map _)
    ).flatten
    val mapper: Struct => Struct = Function.chain(fns)

    val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0))

    new HiveReader {
      lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema)
      override def iterator: Iterator[Record] = reader.iterator.map { record =>
        Record(mapper(record.struct), record.path, record.offset)
      }
      override def close(): Unit = reader.close()
    }
  }

  private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit)

  override def hasNext: Boolean = iterator.hasNext

  override def next(): SourceRecord = {

    val record = iterator.next
    val sourcePartition = SourcePartition(db, tableName, topic, record.path)
    val offset = SourceOffset(record.offset)

    new SourceRecord(
      fromSourcePartition(sourcePartition).asJava,
      fromSourceOffset(offset).asJava,
      topic.value,
      record.struct.schema,
      record.struct
    )
  }

  def close(): Unit = {
    readers.foreach(_.close())
  }
}

Source File: ReThinkSourceReadersFactory.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.rethink.source

import java.util
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.atomic.AtomicBoolean

import com.datamountaineer.streamreactor.connect.rethink.ReThinkConnection
import com.datamountaineer.streamreactor.connect.rethink.config.{ReThinkSourceConfig, ReThinkSourceSetting, ReThinkSourceSettings}
import com.rethinkdb.RethinkDB
import com.rethinkdb.net.{Connection, Cursor}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data.SchemaBuilder
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

object ReThinkSourceReadersFactory {

  def apply(config: ReThinkSourceConfig, r: RethinkDB): Set[ReThinkSourceReader] = {
    val conn = Some(ReThinkConnection(r, config))
    val settings = ReThinkSourceSettings(config)
    settings.map(s => new ReThinkSourceReader(r, conn.get, s))
  }
}

class ReThinkSourceReader(rethink: RethinkDB, conn: Connection, setting: ReThinkSourceSetting)
  extends StrictLogging {

  logger.info(s"Initialising ReThink Reader for ${setting.source}")
  private val keySchema = SchemaBuilder.string().optional().build()
  private val valueSchema = ChangeFeedStructBuilder.schema
  private val sourcePartition = Map.empty[String, String]
  private val offset = Map.empty[String, String]
  private val stopFeed = new AtomicBoolean(false)
  private val handlingFeed = new AtomicBoolean(false)
  private var feed : Cursor[util.HashMap[String, String]] = _
  val queue = new LinkedBlockingQueue[SourceRecord]()
  val batchSize = setting.batchSize

  def start() = {
    feed = getChangeFeed()
    startFeed(feed)
  }

  def stop() = {
    logger.info(s"Closing change feed for ${setting.source}")
    stopFeed.set(true)
    while (handlingFeed.get()) {
      logger.debug("Waiting for feed to shutdown...")
      Thread.sleep(1000)
    }
    feed.close()
    logger.info(s"Change feed closed for ${setting.source}")
  }

  
  private def handleFeed(feed: Cursor[util.HashMap[String, String]]) = {
    handlingFeed.set(true)

    //feed.next is blocking
    while(!stopFeed.get()) {
      logger.debug(s"Waiting for next change feed event for ${setting.source}")
      val cdc = convert(feed.next().asScala.toMap)
      queue.put(cdc)
    }
    handlingFeed.set(false)
  }

  private def getChangeFeed(): Cursor[util.HashMap[String, String]] = {
    logger.info(s"Initialising change feed for ${setting.source}")
    rethink
      .db(setting.db)
      .table(setting.source)
      .changes()
      .optArg("include_states", true)
      .optArg("include_initial", setting.initialise)
      .optArg("include_types", true)
      .run(conn)
  }

  private def convert(feed: Map[String, String]) = {
    new SourceRecord(sourcePartition.asJava, offset.asJava, setting.target, keySchema, setting.source, valueSchema,
      ChangeFeedStructBuilder(feed))
  }
}

Source File: PulsarSourceTask.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.pulsar.source

import java.util
import java.util.UUID

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import org.apache.pulsar.client.api.{ClientConfiguration, PulsarClient}
import org.apache.pulsar.client.impl.auth.AuthenticationTls
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}

class PulsarSourceTask extends SourceTask with StrictLogging {
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private var pulsarManager: Option[PulsarManager] = None
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {

    logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/pulsar-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    implicit val settings = PulsarSourceSettings(PulsarSourceConfig(conf), props.getOrDefault("tasks.max", "1").toInt)


    val name = conf.getOrDefault("name", s"kafka-connect-pulsar-source-${UUID.randomUUID().toString}")
    val convertersMap = buildConvertersMap(conf, settings)

    val messageConverter = PulsarMessageConverter(
      convertersMap,
      settings.kcql,
      settings.throwOnConversion,
      settings.pollingTimeout,
      settings.batchSize)

    val clientConf = new ClientConfiguration()

    settings.sslCACertFile.foreach(f => {
      clientConf.setUseTls(true)
      clientConf.setTlsTrustCertsFilePath(f)

      val authParams = settings.sslCertFile.map(f => ("tlsCertFile", f)).toMap ++ settings.sslCertKeyFile.map(f => ("tlsKeyFile", f)).toMap
      clientConf.setAuthentication(classOf[AuthenticationTls].getName, authParams.asJava)
    })

    pulsarManager = Some(new PulsarManager(PulsarClient.create(settings.connection, clientConf), name, settings.kcql, messageConverter))
    enableProgress = settings.enableProgress
  }

  def buildConvertersMap(props: util.Map[String, String], settings: PulsarSourceSettings): Map[String, Converter] = {
    settings.sourcesToConverters.map { case (topic, clazz) =>
      logger.info(s"Creating converter instance for $clazz")
      val converter = Try(Class.forName(clazz).newInstance()) match {
        case Success(value) => value.asInstanceOf[Converter]
        case Failure(_) => throw new ConfigException(s"Invalid ${PulsarConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!")
      }
      import scala.collection.JavaConverters._
      converter.initialize(props.asScala.toMap)
      topic -> converter
    }
  }

  
  override def stop(): Unit = {
    logger.info("Stopping Pulsar source.")
    pulsarManager.foreach(_.close())
    progressCounter.empty
  }

  override def version: String = manifest.version()
}

Source File: PulsarMessageConverterTest.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.pulsar.source

import java.util

import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings}
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import org.apache.kafka.connect.source.SourceRecord
import org.apache.pulsar.client.api.MessageBuilder
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

import scala.collection.JavaConverters._


class PulsarMessageConverterTest extends AnyWordSpec with Matchers with ConverterUtil {

  val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic"
  val jsonMessage = "{\"int8\":12,\"int16\":12,\"int32\":12,\"int64\":12,\"float32\":12.2,\"float64\":12.2,\"boolean\":true,\"string\":\"foo\"}"

  "should convert messages" in {
    val props =  Map(
      PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650",
      PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO kafka_topic SELECT * FROM $pulsarTopic BATCH = 10",
      PulsarConfigConstants.THROW_ON_CONVERT_ERRORS_CONFIG -> "true",
      PulsarConfigConstants.POLLING_TIMEOUT_CONFIG -> "500"
    ).asJava

    val config = PulsarSourceConfig(props)
    val settings = PulsarSourceSettings(config, 1)

    // test part of the task here aswell
    val task = new PulsarSourceTask()
    val convertersMap = task.buildConvertersMap(props, settings)

    val converter = PulsarMessageConverter(convertersMap, settings.kcql, false, 100, 100)

    val message = MessageBuilder
      .create
      .setContent(jsonMessage.getBytes)
      .setKey("landoop")
      .setSequenceId(1)
      .build()


    // pulsar message
    converter.convertMessages(message, pulsarTopic)

    val list = new util.ArrayList[SourceRecord]()
    converter.getRecords(list)
    list.size shouldBe 1
    val record = list.get(0)
    record.key().toString shouldBe "landoop"
    record.value().asInstanceOf[Array[Byte]].map(_.toChar).mkString shouldBe jsonMessage
  }
}

org.apache.kafka.connect.source.SourceRecord Scala Examples