org.apache.kafka.connect.source.SourceRecord Scala Examples
The following examples show how to use org.apache.kafka.connect.source.SourceRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: TwitterStatusReader.scala From kafka-tweet-producer with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors} import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus import com.twitter.hbc.httpclient.BasicClient import com.twitter.hbc.twitter4j.Twitter4jStatusClient import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import twitter4j._ import scala.collection.JavaConverters._ import Extensions._ class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging { override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning") override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice") override def onScrubGeo(l: Long, l1: Long) = { log.debug(s"onScrubGeo $l $l1") } override def onStatus(status: Status) = { log.debug("onStatus") queue.put(status) } override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i") override def onException(e: Exception)= log.warn("onException " + e.toString) } trait StatusToSourceRecord { def convert(status: Status, topic: String): SourceRecord } object StatusToStringKeyValue extends StatusToSourceRecord { def convert (status: Status, topic: String): SourceRecord = { new SourceRecord( Map("tweetSource" -> status.getSource).asJava, //source partitions? Map("tweetId" -> status.getId).asJava, //source offsets? topic, null, Schema.STRING_SCHEMA, status.getUser.getScreenName, Schema.STRING_SCHEMA, status.getText) } } object StatusToTwitterStatusStructure extends StatusToSourceRecord { def convert(status: Status, topic: String): SourceRecord = { //val ts = TwitterStatus.struct(TwitterStatus(status)) new SourceRecord( Map("tweetSource" -> status.getSource).asJava, //source partitions? Map("tweetId" -> status.getId).asJava, //source offsets? topic, TwitterStatus.schema, TwitterStatus.struct(status)) } } def stop() = { log.info("Stop Twitter client") client.stop() } }
Example 2
Source File: HiveSource.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source import com.landoop.streamreactor.connect.hive import com.landoop.streamreactor.connect.hive._ import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record} import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper} import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.hive.metastore.IMetaStoreClient import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ class HiveSource(db: DatabaseName, tableName: TableName, topic: Topic, offsetReader: HiveSourceOffsetStorageReader, config: HiveSourceConfig) (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] { val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic) .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}")) private val table = client.getTable(db.value, tableName.value) private val format = HiveFormat(hive.serde(table)) private val metastoreSchema = HiveSchemas.toKafka(table) private val parts = TableFileScanner.scan(db, tableName) private val readers = parts.map { case (path, partition) => val fns: Seq[Struct => Struct] = Seq( partition.map(new PartitionValueMapper(_).map _), tableConfig.projection.map(new ProjectionMapper(_).map _) ).flatten val mapper: Struct => Struct = Function.chain(fns) val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0)) new HiveReader { lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema) override def iterator: Iterator[Record] = reader.iterator.map { record => Record(mapper(record.struct), record.path, record.offset) } override def close(): Unit = reader.close() } } private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit) override def hasNext: Boolean = iterator.hasNext override def next(): SourceRecord = { val record = iterator.next val sourcePartition = SourcePartition(db, tableName, topic, record.path) val offset = SourceOffset(record.offset) new SourceRecord( fromSourcePartition(sourcePartition).asJava, fromSourceOffset(offset).asJava, topic.value, record.struct.schema, record.struct ) } def close(): Unit = { readers.foreach(_.close()) } }
Example 3
Source File: CoapReaderFactory.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.coap.source import java.util import java.util.concurrent.LinkedBlockingQueue import com.datamountaineer.streamreactor.connect.coap.configs.CoapSetting import com.datamountaineer.streamreactor.connect.coap.connection.CoapManager import com.datamountaineer.streamreactor.connect.coap.domain.CoapMessageConverter import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.source.SourceRecord import org.eclipse.californium.core.{CoapHandler, CoapObserveRelation, CoapResponse, WebLink} class MessageHandler(resource: String, topic: String, queue: LinkedBlockingQueue[SourceRecord]) extends CoapHandler with StrictLogging { val converter = CoapMessageConverter() override def onError(): Unit = { logger.warn(s"Message dropped for $topic!") } override def onLoad(response: CoapResponse): Unit = { val records = converter.convert(resource, topic, response.advanced()) logger.debug(s"Received ${response.advanced().toString} for $topic") logger.debug(s"Records in queue ${queue.size()} for $topic") queue.put(records) } }
Example 4
Source File: CoapSourceTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.coap.source import java.util import java.util.concurrent.LinkedBlockingQueue import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSourceConfig} import com.datamountaineer.streamreactor.connect.queues.QueueHelpers import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.source.{SourceRecord, SourceTask} import scala.collection.JavaConverters._ class CoapSourceTask extends SourceTask with StrictLogging { private var readers: Set[CoapReader] = _ private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private val queue = new LinkedBlockingQueue[SourceRecord]() private var batchSize: Int = CoapConstants.BATCH_SIZE_DEFAULT private var lingerTimeout = CoapConstants.SOURCE_LINGER_MS_DEFAULT private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def start(props: util.Map[String, String]): Unit = { logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-source-ascii.txt")).mkString + s" $version") logger.info(manifest.printManifest()) val conf = if (context.configs().isEmpty) props else context.configs() val config = CoapSourceConfig(conf) enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED) val settings = CoapSettings(config) batchSize = config.getInt(CoapConstants.BATCH_SIZE) lingerTimeout = config.getInt(CoapConstants.SOURCE_LINGER_MS) enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED) readers = CoapReaderFactory(settings, queue) } override def poll(): util.List[SourceRecord] = { val records = new util.ArrayList[SourceRecord]() QueueHelpers.drainWithTimeoutNoGauva(records, batchSize, lingerTimeout * 1000000 , queue) if (enableProgress) { progressCounter.update(records.asScala.toVector) } records } override def stop(): Unit = { logger.info("Stopping Coap source and closing connections.") readers.foreach(_.stop()) progressCounter.empty } override def version: String = manifest.version() }
Example 5
Source File: SimpleFileConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.util import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer import org.apache.kafka.connect.source.SourceRecord import org.apache.kafka.connect.storage.OffsetStorageReader import scala.collection.JavaConverters._ class SimpleFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader) extends FileConverter(props, offsetStorageReader) { val cfg = new FtpSourceConfig(props) val metaStore = new ConnectFileMetaDataStore(offsetStorageReader) val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter val recordMaker: SourceRecordProducer = cfg.keyStyle match { case KeyStyle.String => SourceRecordProducers.stringKeyRecord case KeyStyle.Struct => SourceRecordProducers.structKeyRecord } override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = { metaStore.set(meta.attribs.path, meta) recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala } override def getFileOffset(path: String): Option[FileMetaData] = metaStore.get(path) }
Example 6
Source File: FileConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.util import org.apache.kafka.connect.source.SourceRecord import org.apache.kafka.connect.storage.OffsetStorageReader import scala.util.{Failure, Success, Try} abstract class FileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader) { def convert(topic: String, meta: FileMetaData, body: FileBody) : Seq[SourceRecord] def getFileOffset(path: String) : Option[FileMetaData] } object FileConverter { def apply(klass: Class[_], props: util.Map[String, String], offsetStorageReader: OffsetStorageReader) : FileConverter = { Try(klass.getDeclaredConstructor(classOf[util.Map[String, String]], classOf[OffsetStorageReader]) .newInstance(props, offsetStorageReader).asInstanceOf[FileConverter]) match { case Success(fc) => fc case Failure(err) => throw new Exception(s"Failed to create ${klass} as instance of ${classOf[FileConverter]}", err) } } }
Example 7
Source File: MaxLinesFileConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.util import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer import org.apache.kafka.connect.source.SourceRecord import org.apache.kafka.connect.storage.OffsetStorageReader import scala.collection.JavaConverters._ class MaxLinesFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader) extends FileConverter(props, offsetStorageReader) { val cfg = new FtpSourceConfig(props) val metaStore = new ConnectFileMetaDataStore(offsetStorageReader) val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter val recordMaker: SourceRecordProducer = cfg.keyStyle match { case KeyStyle.String => SourceRecordProducers.stringKeyRecord case KeyStyle.Struct => SourceRecordProducers.structKeyRecord } val lineSep = System.getProperty("line.separator").getBytes override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = { if (meta.attribs.size == meta.offset) { //Last slice of the file. there is maybe no line separator at the end of the file metaStore.set(meta.attribs.path, meta) recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala } else { val offsetInSlice = findEndPositionOfLastMatch(lineSep, body.bytes) // TODO : warn that no line seprator was found, suggest that the line sizes maybe exceeds the slice size val offset = meta.offset - (body.bytes.size-offsetInSlice) metaStore.set(meta.attribs.path, meta.offset(offset)) val trimmedBody = FileBody(util.Arrays.copyOfRange(body.bytes, 0, offsetInSlice), 0) recordConverter.convert(recordMaker(metaStore, topic, meta, trimmedBody)).asScala } } def findEndPositionOfLastMatch(bytesToMatch: Array[Byte], content: Array[Byte]) : Int = { for (pos <- content.size to bytesToMatch.size by -1){ val window = util.Arrays.copyOfRange(content, pos - bytesToMatch.size, pos) if (window.deep == bytesToMatch.deep) return pos } -1 } override def getFileOffset(path: String): Option[FileMetaData] = { metaStore.get(path) } }
Example 8
Source File: SourceRecordProducers.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord object SourceRecordProducers { type SourceRecordProducer = (ConnectFileMetaDataStore, String, FileMetaData, FileBody) => SourceRecord val fileInfoSchema = SchemaBuilder.struct() .field("name", Schema.STRING_SCHEMA) .field("offset", Schema.INT64_SCHEMA) .build() def stringKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic Schema.STRING_SCHEMA, // key sch meta.attribs.path, // key Schema.BYTES_SCHEMA, // val sch body.bytes // val ) def structKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = { new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic fileInfoSchema, // key sch new Struct(fileInfoSchema) .put("name",meta.attribs.path) .put("offset",body.offset), Schema.BYTES_SCHEMA, // val sch body.bytes // val ) } }
Example 9
Source File: TwitterReader.scala From kafka-tweet-producer with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util.concurrent.LinkedBlockingQueue import com.twitter.hbc.ClientBuilder import com.twitter.hbc.core.Constants import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint import com.twitter.hbc.core.processor.StringDelimitedProcessor import com.twitter.hbc.core.endpoint.Location import com.twitter.hbc.httpclient.auth.OAuth1 import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext} import twitter4j.Status import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ object TwitterReader { def apply(config: TwitterSourceConfig, context: SourceTaskContext) = { //endpoints val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) { new StatusesSampleEndpoint() } else { val trackEndpoint = new StatusesFilterEndpoint() val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) if (!terms.isEmpty) { trackEndpoint.trackTerms(terms) } val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS) if (!locs.isEmpty) { val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))}) .asJava trackEndpoint.locations(locations) } val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) if (!follow.isEmpty) { val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava trackEndpoint.followings(users) } trackEndpoint } endpoint.stallWarnings(false) val language = config.getList(TwitterSourceConfig.LANGUAGE) if (!language.isEmpty) { // endpoint.languages(language) doesn't work as intended! endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(",")) } //twitter auth stuff val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG), config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value, config.getString(TwitterSourceConfig.TOKEN_CONFIG), config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value) //batch size to take from the queue val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE) val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT) //The Kafka topic to append to val topic = config.getString(TwitterSourceConfig.TOPIC) //queue for client to buffer to val queue = new LinkedBlockingQueue[String](10000) //how the output is formatted val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match { case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure } //build basic client val client = new ClientBuilder() .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME)) .hosts(Constants.STREAM_HOST) .endpoint(endpoint) .authentication(auth) .processor(new StringDelimitedProcessor(queue)) .build() new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter) } }
Example 10
Source File: TwitterSourceTask.scala From kafka-tweet-producer with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import org.apache.kafka.connect.source.{SourceRecord, SourceTask} class TwitterSourceTask extends SourceTask with Logging { private var reader : Option[TwitterStatusReader] = null override def poll(): util.List[SourceRecord] = { require(reader.isDefined, "Twitter client not initialized!") reader.get.poll() } override def start(props: util.Map[String, String]): Unit = { val sourceConfig = new TwitterSourceConfig(props) reader = Some(TwitterReader(config = sourceConfig, context = context)) } override def stop() = { reader.foreach(r=>r.stop()) } override def version(): String = "" }
Example 11
Source File: MqttSourceTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mqtt.source import java.io.File import java.util import com.datamountaineer.streamreactor.connect.converters.source.Converter import com.datamountaineer.streamreactor.connect.mqtt.config.{MqttConfigConstants, MqttSourceConfig, MqttSourceSettings} import com.datamountaineer.streamreactor.connect.mqtt.connection.MqttClientConnectionFn import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.common.config.ConfigException import org.apache.kafka.connect.source.{SourceRecord, SourceTask} import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} class MqttSourceTask extends SourceTask with StrictLogging { private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private var mqttManager: Option[MqttManager] = None private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def start(props: util.Map[String, String]): Unit = { logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/mqtt-source-ascii.txt")).mkString + s" $version") logger.info(manifest.printManifest()) val conf = if (context.configs().isEmpty) props else context.configs() val settings = MqttSourceSettings(MqttSourceConfig(conf)) settings.sslCACertFile.foreach { file => if (!new File(file).exists()) { throw new ConfigException(s"${MqttConfigConstants.SSL_CA_CERT_CONFIG} is invalid. Can't locate $file") } } settings.sslCertFile.foreach { file => if (!new File(file).exists()) { throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_CONFIG} is invalid. Can't locate $file") } } settings.sslCertKeyFile.foreach { file => if (!new File(file).exists()) { throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_KEY_CONFIG} is invalid. Can't locate $file") } } val convertersMap = settings.sourcesToConverters.map { case (topic, clazz) => logger.info(s"Creating converter instance for $clazz") val converter = Try(Class.forName(clazz).newInstance()) match { case Success(value) => value.asInstanceOf[Converter] case Failure(_) => throw new ConfigException(s"Invalid ${MqttConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!") } import scala.collection.JavaConverters._ converter.initialize(conf.asScala.toMap) topic -> converter } logger.info("Starting Mqtt source...") mqttManager = Some(new MqttManager(MqttClientConnectionFn.apply, convertersMap, settings)) enableProgress = settings.enableProgress } override def stop(): Unit = { logger.info("Stopping Mqtt source.") mqttManager.foreach(_.close()) progressCounter.empty } override def version: String = manifest.version() }
Example 12
Source File: TwitterReader.scala From kafka-connect-twitter with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util.concurrent.LinkedBlockingQueue import com.twitter.hbc.ClientBuilder import com.twitter.hbc.core.Constants import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint import com.twitter.hbc.core.processor.StringDelimitedProcessor import com.twitter.hbc.core.endpoint.Location import com.twitter.hbc.httpclient.auth.OAuth1 import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext} import twitter4j.Status import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ object TwitterReader { def apply(config: TwitterSourceConfig, context: SourceTaskContext) = { //endpoints val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) { new StatusesSampleEndpoint() } else { val trackEndpoint = new StatusesFilterEndpoint() val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) if (!terms.isEmpty) { trackEndpoint.trackTerms(terms) } val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS) if (!locs.isEmpty) { val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))}) .asJava trackEndpoint.locations(locations) } val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) if (!follow.isEmpty) { val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava trackEndpoint.followings(users) } trackEndpoint } endpoint.stallWarnings(false) val language = config.getList(TwitterSourceConfig.LANGUAGE) if (!language.isEmpty) { // endpoint.languages(language) doesn't work as intended! endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(",")) } //twitter auth stuff val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG), config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value, config.getString(TwitterSourceConfig.TOKEN_CONFIG), config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value) //batch size to take from the queue val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE) val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT) //The Kafka topic to append to val topic = config.getString(TwitterSourceConfig.TOPIC) //queue for client to buffer to val queue = new LinkedBlockingQueue[String](10000) //how the output is formatted val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match { case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure } //build basic client val client = new ClientBuilder() .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME)) .hosts(Constants.STREAM_HOST) .endpoint(endpoint) .authentication(auth) .processor(new StringDelimitedProcessor(queue)) .build() new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter) } }
Example 13
Source File: TwitterSourceTask.scala From kafka-connect-twitter with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import org.apache.kafka.connect.source.{SourceRecord, SourceTask} class TwitterSourceTask extends SourceTask with Logging { private var reader : Option[TwitterStatusReader] = null override def poll(): util.List[SourceRecord] = { require(reader.isDefined, "Twitter client not initialized!") reader.get.poll() } override def start(props: util.Map[String, String]): Unit = { val sourceConfig = new TwitterSourceConfig(props) reader = Some(TwitterReader(config = sourceConfig, context = context)) } override def stop() = { reader.foreach(r=>r.stop()) } override def version(): String = "" }
Example 14
Source File: TwitterStatusReader.scala From kafka-connect-twitter with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors} import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus import com.twitter.hbc.httpclient.BasicClient import com.twitter.hbc.twitter4j.Twitter4jStatusClient import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import twitter4j._ import scala.collection.JavaConverters._ import Extensions._ class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging { override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning") override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice") override def onScrubGeo(l: Long, l1: Long) = { log.debug(s"onScrubGeo $l $l1") } override def onStatus(status: Status) = { log.debug("onStatus") queue.put(status) } override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i") override def onException(e: Exception)= log.warn("onException " + e.toString) } trait StatusToSourceRecord { def convert(status: Status, topic: String): SourceRecord } object StatusToStringKeyValue extends StatusToSourceRecord { def convert (status: Status, topic: String): SourceRecord = { new SourceRecord( Map("tweetSource" -> status.getSource).asJava, //source partitions? Map("tweetId" -> status.getId).asJava, //source offsets? topic, null, Schema.STRING_SCHEMA, status.getUser.getScreenName, Schema.STRING_SCHEMA, status.getText, status.getCreatedAt.getTime) } } object StatusToTwitterStatusStructure extends StatusToSourceRecord { def convert(status: Status, topic: String): SourceRecord = { //val ts = TwitterStatus.struct(TwitterStatus(status)) new SourceRecord( Map("tweetSource" -> status.getSource).asJava, //source partitions? Map("tweetId" -> status.getId).asJava, //source offsets? topic, null, Schema.STRING_SCHEMA, status.getUser.getScreenName, TwitterStatus.schema, TwitterStatus.struct(status), status.getCreatedAt.getTime) } } def stop() = { log.info("Stop Twitter client") client.stop() } }
Example 15
Source File: JsonPassThroughConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.landoop.json.sql.JacksonJson import org.apache.kafka.connect.source.SourceRecord class JsonPassThroughConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, "utf-8") val jsonNode = JacksonJson.asJson(json) var keysValue = keys.flatMap { key => Option(KeyExtractor.extract(jsonNode, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) // If keys are not provided, default one will be constructed if (keysValue == "") { keysValue = s"$sourceTopic$keyDelimiter$messageId" } new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, null, keysValue, null, json) } }
Example 16
Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.io.File import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import io.confluent.connect.avro.AvroData import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.apache.avro.{Schema => AvroSchema} import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.source.SourceRecord import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException class AvroConverter extends Converter { private val avroData = new AvroData(8) private var sourceToSchemaMap: Map[String, AvroSchema] = Map.empty private var avroReadersMap: Map[String, GenericDatumReader[GenericRecord]] = Map.empty override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { Option(bytes) match { case None => new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, avroData.toConnectSchema(sourceToSchemaMap(sourceTopic)), null) case Some(_) => val reader = avroReadersMap.getOrElse(sourceTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $sourceTopic")) val decoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = reader.read(null, decoder) val schemaAndValue = avroData.toConnectData(sourceToSchemaMap(sourceTopic.toLowerCase), record) val value = schemaAndValue.value() value match { case s: Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _ => new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } override def initialize(config: Map[String, String]): Unit = { sourceToSchemaMap = AvroConverter.getSchemas(config) avroReadersMap = sourceToSchemaMap.map { case (key, schema) => key -> new GenericDatumReader[GenericRecord](schema) } } } object AvroConverter { val SCHEMA_CONFIG = "connect.source.converter.avro.schemas" def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = { config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided")) .toString .split(';') .filter(_.trim.nonEmpty) .map(_.split("=")) .map { case Array(source, path) => val file = new File(path) if (!file.exists()) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!") } val s = source.trim.toLowerCase() if (s.isEmpty) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path") } s -> new AvroSchema.Parser().parse(file) case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Source->AVRO_FILE") }.toMap } }
Example 17
Source File: JsonSimpleConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.nio.charset.Charset import java.util import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord class JsonSimpleConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys:Seq[String] = Seq.empty, keyDelimiter:String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, Charset.defaultCharset) val schemaAndValue = JsonSimpleConverter.convert(sourceTopic, json) val value = schemaAndValue.value() value match { case s:Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _=> new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } object JsonSimpleConverter { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str)) def convert(name: String, value: JValue): SchemaAndValue = { value match { case JArray(arr) => val values = new util.ArrayList[AnyRef]() val sv = convert(name, arr.head) values.add(sv.value()) arr.tail.foreach { v => values.add(convert(name, v).value()) } val schema = SchemaBuilder.array(sv.schema()).optional().build() new SchemaAndValue(schema, values) case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b) case JDecimal(d) => val schema = Decimal.builder(d.scale).optional().build() new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal)) case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d) case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries) case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l) case JNull | JNothing => new SchemaAndValue(Schema.STRING_SCHEMA, null) case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s) case JObject(values) => val builder = SchemaBuilder.struct().name(name.replace("/", "_")) val fields = values.map { case (n, v) => val schemaAndValue = convert(n, v) builder.field(n, schemaAndValue.schema()) n -> schemaAndValue.value() }.toMap val schema = builder.build() val struct = new Struct(schema) fields.foreach { case (field, v) => struct.put(field, v) } new SchemaAndValue(schema, struct) } } }
Example 18
Source File: JsonOptNullConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.nio.charset.Charset import java.util import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord class JsonOptNullConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys:Seq[String] = Seq.empty, keyDelimiter:String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, Charset.defaultCharset) val schemaAndValue = JsonOptNullConverter.convert(sourceTopic, json) val value = schemaAndValue.value() value match { case s:Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _=> new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } object JsonOptNullConverter { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str)) def convert(name: String, value: JValue): SchemaAndValue = { value match { case JArray(arr) => val values = new util.ArrayList[AnyRef]() val sv = convert(name, arr.head) values.add(sv.value()) arr.tail.foreach { v => values.add(convert(name, v).value()) } val schema = SchemaBuilder.array(sv.schema()).optional().build() new SchemaAndValue(schema, values) case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b) case JDecimal(d) => val schema = Decimal.builder(d.scale).optional().build() new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal)) case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d) case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries) case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l) case JNull | JNothing => new SchemaAndValue(Schema.OPTIONAL_STRING_SCHEMA, null) case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s) case JObject(values) => val builder = SchemaBuilder.struct().name(name.replace("/", "_")) val fields = values.map { case (n, v) => val schemaAndValue = convert(n, v) builder.field(n, schemaAndValue.schema()) n -> schemaAndValue.value() }.toMap val schema = builder.build() val struct = new Struct(schema) fields.foreach { case (field, v) => struct.put(field, v) } new SchemaAndValue(schema, struct) } } }
Example 19
Source File: BytesConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord class BytesConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), Schema.BYTES_SCHEMA, bytes) } }
Example 20
Source File: TableQuerier.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.source.querier import com.sap.kafka.client.hana.HANAJdbcClient import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants} import com.sap.kafka.connect.config.hana.HANAConfig import com.sap.kafka.utils.hana.HANAJdbcTypeConverter import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.source.SourceRecord import org.slf4j.LoggerFactory import scala.util.Random abstract class TableQuerier(mode: String, tableOrQuery: String, topic: String, config: BaseConfig, var jdbcClient: Option[HANAJdbcClient]) extends Comparable[TableQuerier] { var tableName: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_TABLE)) tableOrQuery else null var query: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_SQL)) tableOrQuery else null var lastUpdate: Long = 0 var schema: Schema = _ var queryString: Option[String] = None var resultList: Option[List[Struct]] = None val log = LoggerFactory.getLogger(getClass) def getLastUpdate(): Long = lastUpdate def getOrCreateQueryString(): Option[String] = { createQueryString() queryString } def createQueryString(): Unit def querying(): Boolean = resultList.isDefined def maybeStartQuery(): Unit = { if (resultList.isEmpty) { schema = getSchema() queryString = getOrCreateQueryString() val batchMaxRows = config.batchMaxRows resultList = getOrCreateJdbcClient().get.executeQuery(schema, queryString.get, 0, batchMaxRows) log.info(resultList.size.toString) } } def extractRecords(): List[SourceRecord] def close(now: Long): Unit = { resultList = None schema = null lastUpdate = now } protected def getOrCreateJdbcClient(): Option[HANAJdbcClient] = { if (jdbcClient.isDefined) { return jdbcClient } config match { case hanaConfig: HANAConfig => Some(HANAJdbcClient(hanaConfig)) case _ => throw new RuntimeException("Cannot create Jdbc Client") } } private def getSchema(): Schema = { mode match { case BaseConfigConstants.QUERY_MODE_TABLE => if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) { val metadata = getOrCreateJdbcClient().get.getMetaData(tableOrQuery, None) HANAJdbcTypeConverter.convertHANAMetadataToSchema(tableName, metadata) } else { throw new RuntimeException("Jdbc Client is not available") } case BaseConfigConstants.QUERY_MODE_SQL => if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) { val metadata = getOrCreateJdbcClient().get.getMetadata(tableOrQuery) HANAJdbcTypeConverter.convertHANAMetadataToSchema("Query" + Random.nextInt, metadata) } else { throw new RuntimeException("Jdbc Client is not available") } case _ => throw new RuntimeException("Other Query modes are not supported") } } override def compareTo(other: TableQuerier): Int = { if (this.lastUpdate < other.lastUpdate) { -1 } else if (this.lastUpdate > other.lastUpdate) { 0 } else { this.tableName.compareTo(other.tableName) } } }
Example 21
Source File: TestMapKeyToString.scala From kafka-connect-transformers with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.transforms import java.util import com.datamountaineer.streamreactor.connect.transforms.MapKeyToString.Value import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord import org.apache.kafka.connect.transforms.util.Requirements.requireStruct import org.scalatest.{Matchers, WordSpec} import scala.collection.JavaConversions._ class TestMapKeyToString extends WordSpec with Matchers { val MAP_SCHEMA = SchemaBuilder.map(Schema.OPTIONAL_INT64_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA).optional().build(); val FIELDS_CONFIG = "fields" "should transform all map key to string schema" in { val transform = new Value[SourceRecord]; transform.configure(Map( FIELDS_CONFIG -> "map1, map2") ) val transformedRecord = transform.apply(mockRecord(true)); val value = requireStruct(transformedRecord.value, null) val schema = transformedRecord.valueSchema schema.field("map1").schema().keySchema().`type`().getName shouldBe "string" value.getMap("map1").get("1").toString shouldBe "value1-1" schema.field("map2").schema().keySchema().`type`().getName shouldBe "string" value.getMap("map2").get("1").toString shouldBe "value2-1" } "should transform only one map key to string schema" in { val transform = new Value[SourceRecord]; transform.configure(Map( FIELDS_CONFIG -> "map1") ) val transformedRecord = transform.apply(mockRecord(true)); val value = requireStruct(transformedRecord.value, null) val schema = transformedRecord.valueSchema schema.field("map1").schema().keySchema().`type`().getName shouldBe "string" value.getMap("map1").get("1").toString shouldBe "value1-1" schema.field("map2").schema().keySchema().`type`().getName shouldBe "int64" value.getMap("map2").get(1L).toString shouldBe "value2-1" } private def mockRecord(withSchema: Boolean) = { val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc") .field("magic", Schema.OPTIONAL_INT64_SCHEMA) .field("map1", MAP_SCHEMA) .field("map2", MAP_SCHEMA) .build val simpleStruct = new Struct(simpleStructSchema) .put("magic", 42L) .put("map1", new util.HashMap[Long, String]{ put(1L,"value1-1") put(2L,"value1-2") }) .put("map2", new util.HashMap[Long, String]{ put(1L,"value2-1") put(2L,"value2-2") }) new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct) } }
Example 22
Source File: TestNestingFields.scala From kafka-connect-transformers with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.transforms import java.util.Date import com.datamountaineer.streamreactor.connect.transforms.NestingFields.Value import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord import org.apache.kafka.connect.transforms.util.Requirements.requireStruct import org.scalatest.{Matchers, WordSpec} import scala.collection.JavaConversions._ class TestNestingFields extends WordSpec with Matchers { val OPTIONAL_TIMESTAMP_SCHEMA = Timestamp.builder().optional().build() val OPTIONAL_DECIMAL_SCHEMA = Decimal.builder(18).optional().build() private val NESTED_NAME_CONFIG = "nested.name" private val FIELDS_CONFIG = "fields" "should append another field with two nested fields when have schema" in { val transform = new Value[SourceRecord]; transform.configure(Map( NESTED_NAME_CONFIG -> "id", FIELDS_CONFIG -> "dateValue1, decimalValue1") ) val transformedRecord = transform.apply(mockRecord(true)); val value = requireStruct(transformedRecord.value, null) val schema = transformedRecord.valueSchema val nestedSchema = schema.field("id").schema() val nestedValue = requireStruct(value.get("id"), null) nestedSchema.field("dateValue1").schema().`type`() shouldBe schema.field("dateValue1").schema().`type`() nestedValue.get("dateValue1") shouldBe value.get("dateValue1") nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`() nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1") } "should append another field with one nested fields when have schema" in { val transform = new Value[SourceRecord]; transform.configure(Map( NESTED_NAME_CONFIG -> "id", FIELDS_CONFIG -> "decimalValue1") ) val transformedRecord = transform.apply(mockRecord(true)); val value = requireStruct(transformedRecord.value, null) val schema = transformedRecord.valueSchema val nestedSchema = schema.field("id").schema() val nestedValue = requireStruct(value.get("id"), null) nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`() nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1") } "should append another field with one nested fields when don't have schema" in { val transform = new Value[SourceRecord]; transform.configure(Map( NESTED_NAME_CONFIG -> "id", FIELDS_CONFIG -> "decimalValue1") ) val transformedRecord = transform.apply(mockRecord(true)); val value = requireStruct(transformedRecord.value, null) val nestedValue = requireStruct(value.get("id"), null) nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1") } private def mockRecord(withSchema: Boolean) = { val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc") .field("magic", Schema.OPTIONAL_INT64_SCHEMA) .field("dateValue1", OPTIONAL_TIMESTAMP_SCHEMA) .field("decimalValue1", OPTIONAL_DECIMAL_SCHEMA) .build val simpleStruct = new Struct(simpleStructSchema) .put("magic", 42L) .put("dateValue1", new Date()) .put("decimalValue1", BigDecimal(10.6).bigDecimal.setScale(18)) new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct) } }
Example 23
Source File: CassandraSourceTask.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import java.util.{List => JList, Map => JMap, ArrayList => JArrayList} import org.apache.kafka.connect.connector.Task import org.apache.kafka.connect.source.{SourceRecord, SourceTask} override def poll: JList[SourceRecord] = { val records = new JArrayList[SourceRecord]() val offset = EmptyJMap//context.offsetStorageReader.offset(EmptyJMap) //TODO val partition = EmptyJMap //TODO for { sc <- taskConfig.source iterator <- page(sc) row <- iterator } { val record = row.as(sc.schema.route.topic, partition, offset) records.add(record) if (iterator.done) checkpoint = None //TODO record } records } private def page(sc: SourceConfig): Option[AsyncPagingSourceIterator] = { //TODO need CDC: https://github.com/tuplejump/kafka-connector/issues/9 val query = sc.query match { case q if q.hasPatternT => //TODO remove Thread.sleep with better option like timestamp.fromNow...etc Thread.sleep(sc.query.pollInterval) sc.query.slide case q => // TODO typed: https://tuplejump.atlassian.net/browse/DB-56 timeuuid,timestamp... // by type: WHERE {columnToMove} > checkpoint.value with columnType sc.query } val rs = session.execute(query.cql) if (rs.getAvailableWithoutFetching > 0) Some(new AsyncPagingSourceIterator(rs, sc.options.fetchSize)) else None } }
Example 24
Source File: TimeBasedDataService.scala From kafka-jdbc-connector with Apache License 2.0 | 5 votes |
package com.agoda.kafka.connector.jdbc.services import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp} import java.util.{Date, GregorianCalendar, TimeZone} import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants import com.agoda.kafka.connector.jdbc.models.DatabaseProduct import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL} import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode import com.agoda.kafka.connector.jdbc.utils.DataConverter import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.util.Try case class TimeBasedDataService(databaseProduct: DatabaseProduct, storedProcedureName: String, batchSize: Int, batchSizeVariableName: String, timestampVariableName: String, var timestampOffset: Long, timestampFieldName: String, topic: String, keyFieldOpt: Option[String], dataConverter: DataConverter, calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC")) ) extends DataService { override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try { val preparedStatement = databaseProduct match { case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?") case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)") } preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar) preparedStatement.setObject(2, batchSize) preparedStatement } override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try { val sourceRecords = ListBuffer.empty[SourceRecord] var max = timestampOffset while (resultSet.next()) { dataConverter.convertRecord(schema, resultSet) map { record => val time = record.get(timestampFieldName).asInstanceOf[Date].getTime max = if(time > max) { keyFieldOpt match { case Some(keyField) => sourceRecords += new SourceRecord( Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava, Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record ) case None => sourceRecords += new SourceRecord( Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava, Map(TimestampMode.entryName -> time).asJava, topic, schema, record ) } time } else max } } timestampOffset = max sourceRecords } override def toString: String = { s""" |{ | "name" : "${this.getClass.getSimpleName}" | "mode" : "${TimestampMode.entryName}" | "stored-procedure.name" : "$storedProcedureName" |} """.stripMargin } }
Example 25
Source File: DataService.scala From kafka-jdbc-connector with Apache License 2.0 | 5 votes |
package com.agoda.kafka.connector.jdbc.services import java.sql.{Connection, PreparedStatement, ResultSet} import com.agoda.kafka.connector.jdbc.utils.DataConverter import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import scala.concurrent.duration.Duration import scala.util.Try trait DataService { def getRecords(connection: Connection, timeout: Duration): Try[Seq[SourceRecord]] = { for { preparedStatement <- createPreparedStatement(connection) resultSet <- executeStoredProcedure(preparedStatement, timeout) schema <- dataConverter.convertSchema(storedProcedureName, resultSet.getMetaData) records <- extractRecords(resultSet, schema) } yield records } protected def createPreparedStatement(connection: Connection): Try[PreparedStatement] protected def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] private def executeStoredProcedure(preparedStatement: PreparedStatement, timeout: Duration): Try[ResultSet] = Try { preparedStatement.setQueryTimeout(timeout.toSeconds.toInt) preparedStatement.executeQuery } }
Example 26
Source File: DataServiceTest.scala From kafka-jdbc-connector with Apache License 2.0 | 5 votes |
package com.agoda.kafka.connector.jdbc.services import java.sql.{Connection, PreparedStatement, ResultSet, ResultSetMetaData} import com.agoda.kafka.connector.jdbc.utils.DataConverter import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import org.scalatest.mockito.MockitoSugar import org.mockito.Mockito._ import org.scalatest.{Matchers, WordSpec} import scala.concurrent.duration._ import scala.util.Success class DataServiceTest extends WordSpec with Matchers with MockitoSugar { "Data Service" should { val spName = "stored-procedure" val connection = mock[Connection] val converter = mock[DataConverter] val sourceRecord1 = mock[SourceRecord] val sourceRecord2 = mock[SourceRecord] val resultSet = mock[ResultSet] val resultSetMetadata = mock[ResultSetMetaData] val preparedStatement = mock[PreparedStatement] val schema = mock[Schema] val dataService = new DataService { override def storedProcedureName: String = spName override protected def createPreparedStatement(connection: Connection) = Success(preparedStatement) override protected def extractRecords(resultSet: ResultSet, schema: Schema) = Success(Seq(sourceRecord1, sourceRecord2)) override def dataConverter: DataConverter = converter } "get records" in { doNothing().when(preparedStatement).setQueryTimeout(1) when(preparedStatement.executeQuery).thenReturn(resultSet) when(resultSet.getMetaData).thenReturn(resultSetMetadata) when(converter.convertSchema(spName, resultSetMetadata)).thenReturn(Success(schema)) dataService.getRecords(connection, 1.second) shouldBe Success(Seq(sourceRecord1, sourceRecord2)) verify(preparedStatement).setQueryTimeout(1) verify(preparedStatement).executeQuery verify(resultSet).getMetaData verify(converter).convertSchema(spName, resultSetMetadata) } } }
Example 27
Source File: IotHubPartitionSource.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package com.microsoft.azure.iot.kafka.connect.source import java.util.{Collections, Map} import com.typesafe.scalalogging.LazyLogging import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.source.SourceRecord import scala.collection.mutable.ListBuffer import scala.util.control.NonFatal class IotHubPartitionSource(val dataReceiver: DataReceiver, val partition: String, val topic: String, val batchSize: Int, val eventHubName: String, val sourcePartition: Map[String, String]) extends LazyLogging with JsonSerialization { def getRecords: List[SourceRecord] = { logger.debug(s"Polling for data from eventHub $eventHubName partition $partition") val list = ListBuffer.empty[SourceRecord] try { val messages: Iterable[IotMessage] = this.dataReceiver.receiveData(batchSize) if (messages.isEmpty) { logger.debug(s"Finished processing all messages from eventHub $eventHubName " + s"partition ${this.partition}") } else { logger.debug(s"Received ${messages.size} messages from eventHub $eventHubName " + s"partition ${this.partition} (requested $batchSize batch)") for (msg: IotMessage <- messages) { val kafkaMessage: Struct = IotMessageConverter.getIotMessageStruct(msg) val sourceOffset = Collections.singletonMap("EventHubOffset", kafkaMessage.getString(IotMessageConverter.offsetKey)) val sourceRecord = new SourceRecord(sourcePartition, sourceOffset, this.topic, kafkaMessage.schema(), kafkaMessage) list += sourceRecord } } } catch { case NonFatal(e) => val errorMsg = s"Error while getting SourceRecords for eventHub $eventHubName " + s"partition $partition. Exception - ${e.toString} Stack trace - ${e.printStackTrace()}" logger.error(errorMsg) throw new ConnectException(errorMsg, e) } logger.debug(s"Obtained ${list.length} SourceRecords from IotHub") list.toList } }
Example 28
Source File: SQSSourceTask.scala From sqs-kafka-connect with Apache License 2.0 | 5 votes |
package com.hivehome.kafka.connect.sqs import java.util.{List => JList, Map => JMap} import javax.jms._ import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.{SourceRecord, SourceTask} import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.util.Try import scala.util.control.NonFatal object SQSSourceTask { private val SqsQueueField: String = "queue" private val MessageId: String = "messageId" private val ValueSchema = Schema.STRING_SCHEMA } class SQSSourceTask extends SourceTask { val logger = LoggerFactory.getLogger(getClass.getName) private var conf: Conf = _ private var consumer: MessageConsumer = null // MessageId to MessageHandle used to ack the message on the commitRecord method invocation private var unAcknowledgedMessages = Map[String, Message]() def version: String = Version() def start(props: JMap[String, String]): Unit = { conf = Conf.parse(props.asScala.toMap).get logger.debug("Creating consumer...") synchronized { try { consumer = SQSConsumer(conf) logger.info("Created consumer to SQS topic {} for reading", conf.queueName) } catch { case NonFatal(e) => logger.error("Exception", e) } } } import com.hivehome.kafka.connect.sqs.SQSSourceTask._ @throws(classOf[InterruptedException]) def poll: JList[SourceRecord] = { def toRecord(msg: Message): SourceRecord = { val extracted = MessageExtractor(msg) val key = Map(SqsQueueField -> conf.queueName.get).asJava val value = Map(MessageId -> msg.getJMSMessageID).asJava new SourceRecord(key, value, conf.topicName.get, ValueSchema, extracted) } assert(consumer != null) // should be initialised as part of start() Try { Option(consumer.receive).map { msg => logger.info("Received message {}", msg) // This operation is not threadsafe as a result the plugin is not threadsafe. // However KafkaConnect assigns a single thread to each task and the poll // method is always called by a single thread. unAcknowledgedMessages = unAcknowledgedMessages.updated(msg.getJMSMessageID, msg) toRecord(msg) }.toSeq }.recover { case NonFatal(e) => logger.error("Exception while processing message", e) List.empty }.get.asJava } @throws(classOf[InterruptedException]) override def commitRecord(record: SourceRecord): Unit = { val msgId = record.sourceOffset().get(MessageId).asInstanceOf[String] val maybeMsg = unAcknowledgedMessages.get(msgId) maybeMsg.foreach(_.acknowledge()) unAcknowledgedMessages = unAcknowledgedMessages - msgId } def stop() { logger.debug("Stopping task") synchronized { unAcknowledgedMessages = Map() try { if (consumer != null) { consumer.close() logger.debug("Closed input stream") } } catch { case NonFatal(e) => logger.error("Failed to close consumer stream: ", e) } this.notify() } } }
Example 29
Source File: ValidatorTask.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.connector.validation import java.util import java.util.concurrent.TimeUnit import oharastream.ohara.client.configurator.InspectApi.{RdbInfo, RdbQuery} import oharastream.ohara.client.configurator.{ErrorApi, InspectApi} import oharastream.ohara.client.database.DatabaseClient import oharastream.ohara.common.data.Serializer import oharastream.ohara.common.util.VersionUtils import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.{SourceRecord, SourceTask} import spray.json.{JsObject, _} import scala.jdk.CollectionConverters._ class ValidatorTask extends SourceTask { private[this] var done = false private[this] var props: Map[String, String] = _ private[this] val topic: String = InspectApi.INTERNAL_TOPIC_KEY.topicNameOnKafka private[this] var requestId: String = _ override def start(props: util.Map[String, String]): Unit = { this.props = props.asScala.toMap requestId = require(InspectApi.REQUEST_ID) } override def poll(): util.List[SourceRecord] = if (done) { // just wait the configurator to close this connector TimeUnit.SECONDS.sleep(2) null } else try information match { case query: RdbQuery => toSourceRecord(validate(query)) } catch { case e: Throwable => toSourceRecord(ErrorApi.of(e)) } finally done = true override def stop(): Unit = { // do nothing } override def version(): String = VersionUtils.VERSION private[this] def validate(query: RdbQuery): RdbInfo = { val client = DatabaseClient.builder.url(query.url).user(query.user).password(query.password).build try RdbInfo( name = client.databaseType, tables = client.tableQuery .catalog(query.catalogPattern.orNull) .schema(query.schemaPattern.orNull) .tableName(query.tableName.orNull) .execute() ) finally client.close() } private[this] def toJsObject: JsObject = props(InspectApi.SETTINGS_KEY).parseJson.asJsObject private[this] def information = require(InspectApi.TARGET_KEY) match { case InspectApi.RDB_KIND => InspectApi.RDB_QUERY_FORMAT.read(toJsObject) case other: String => throw new IllegalArgumentException( s"valid targets are ${InspectApi.RDB_KIND}. current is $other" ) } private[this] def toSourceRecord(data: Object): util.List[SourceRecord] = util.Arrays.asList( new SourceRecord( null, null, topic, Schema.BYTES_SCHEMA, Serializer.STRING.to(requestId), Schema.BYTES_SCHEMA, Serializer.OBJECT.to(data) ) ) private[this] def require(key: String): String = props.getOrElse(key, throw new IllegalArgumentException(s"the $key is required")) }
Example 30
Source File: BulkTableQuerier.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.source.querier import com.sap.kafka.client.hana.HANAJdbcClient import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants} import com.sap.kafka.connect.source.SourceConnectorConstants import org.apache.kafka.common.config.ConfigException import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ class BulkTableQuerier(mode: String, tableOrQuery: String, tablePartition: Int, topic: String, config: BaseConfig, jdbcClient: Option[HANAJdbcClient]) extends TableQuerier(mode, tableOrQuery, topic, config, jdbcClient) { override def createQueryString(): Unit = { mode match { case BaseConfigConstants.QUERY_MODE_TABLE => if (tablePartition > 0) { queryString = Some(s"select * from $tableName PARTITION($tablePartition)") } else { queryString = Some(s"select * from $tableName") } case BaseConfigConstants.QUERY_MODE_SQL => queryString = Some(query) } } override def extractRecords(): List[SourceRecord] = { if (resultList.isDefined) { resultList.get.map(record => { var partition: Map[String, String] = null mode match { case BaseConfigConstants.QUERY_MODE_TABLE => partition = Map(SourceConnectorConstants.TABLE_NAME_KEY -> tableName) case BaseConfigConstants.QUERY_MODE_SQL => val partitionName = "Query" partition = Map(SourceConnectorConstants.QUERY_NAME_KEY -> partitionName) case _ => throw new ConfigException(s"Unexpected query mode: $mode") } new SourceRecord(partition.asJava, null, topic, getPartition(tablePartition, topic), record.schema(), record) }) } else List() } override def toString: String = "BulkTableQuerier{" + "name='" + tableOrQuery + '\'' + ", topic='" + topic + '\'' + '}' private def getPartition(tablePartition: Int, topic: String): Int = { val topicProperties = config.topicProperties(topic) val maxPartitions = topicProperties("partition.count").toInt tablePartition % maxPartitions } }
Example 31
Source File: HANASourceTaskConversionTest.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.source import com.sap.kafka.client.MetaSchema import org.apache.kafka.connect.data.Schema.Type import org.apache.kafka.connect.data.{Field, Schema, Struct} import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ class HANASourceTaskConversionTest extends HANASourceTaskTestBase { override def beforeAll(): Unit = { super.beforeAll() task.start(singleTableConfig()) } override def afterAll(): Unit = { task.stop() super.afterAll() } test("boolean type") { typeConversion(Schema.BOOLEAN_SCHEMA, true, java.lang.Boolean.FALSE, Schema.BOOLEAN_SCHEMA, java.lang.Boolean.FALSE) } test("int type") { typeConversion(Schema.INT32_SCHEMA, true, new java.lang.Integer(1), Schema.INT32_SCHEMA, new Integer(1)) } test("long type") { typeConversion(Schema.INT64_SCHEMA, true, new java.lang.Long(1), Schema.INT64_SCHEMA, new java.lang.Long(1)) } test("double type") { typeConversion(Schema.FLOAT64_SCHEMA, true, new java.lang.Double(1.0), Schema.FLOAT64_SCHEMA, new java.lang.Double(1.0)) } test("string type") { typeConversion(Schema.STRING_SCHEMA, true, "'a'", Schema.STRING_SCHEMA, "a") } private def typeConversion(sqlType: Schema, nullable: Boolean, sqlValue: Object, convertedSchema: Schema, convertedValue: Object): Unit = { val fields = Seq(new Field("id", 1, sqlType)) jdbcClient.createTable(Some("TEST"), "EMPLOYEES_SOURCE", MetaSchema(null, fields), 3000) val connection = jdbcClient.getConnection val stmt = connection.createStatement() stmt.execute("insert into \"TEST\".\"EMPLOYEES_SOURCE\" values(" + sqlValue.toString + ")") val records = task.poll() validateRecords(records.asScala.toList, convertedSchema, convertedValue) stmt.execute("drop table \"TEST\".\"EMPLOYEES_SOURCE\"") } private def validateRecords(records: List[SourceRecord], expectedFieldSchema: Schema, expectedValue: Object): Unit = { assert(records.size === 1) val objValue = records.head.value() assert(objValue.isInstanceOf[Struct]) val value = objValue.asInstanceOf[Struct] val schema = value.schema() assert(Type.STRUCT === schema.`type`()) val fields = schema.fields() assert(fields.size() === 1) val fieldSchema = fields.get(0).schema() assert(expectedFieldSchema === fieldSchema) assert(expectedValue === value.get(fields.get(0))) } }
Example 32
Source File: Transaction.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.blockchain.data import java.util import com.datamountaineer.streamreactor.connect.blockchain.data.Input._ import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord case class Transaction(lock_time: Long, ver: Int, size: Long, inputs: Seq[Input], rbf: Option[Boolean], time: Long, tx_index: Long, vin_sz: Int, hash: String, vout_sz: Int, relayed_by: String, out: Seq[Output]) object Transaction { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.transaction") .field("lock_time", Schema.INT64_SCHEMA) .field("ver", Schema.INT32_SCHEMA) .field("size", Schema.INT64_SCHEMA) .field("inputs", SchemaBuilder.array(Input.ConnectSchema).optional().build()) .field("rbf", Schema.OPTIONAL_BOOLEAN_SCHEMA) .field("time", Schema.INT64_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("vin_sz", Schema.INT32_SCHEMA) .field("hash", Schema.STRING_SCHEMA) .field("vout_sz", Schema.INT32_SCHEMA) .field("relayed_by", Schema.STRING_SCHEMA) .field("out", SchemaBuilder.array(Output.ConnectSchema).optional().build()) .build() implicit class TransactionToSourceRecordConverter(val tx: Transaction) extends AnyVal { def toSourceRecord(topic: String, partition: Int, key: Option[String]): SourceRecord = { new SourceRecord( null, null, topic, partition, key.map(_ => Schema.STRING_SCHEMA).orNull, key.orNull, ConnectSchema, tx.toStruct() ) } //private def getOffset() = Collections.singletonMap("position", System.currentTimeMillis()) def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("lock_time", tx.lock_time) .put("ver", tx.ver) .put("size", tx.size) .put("time", tx.time) .put("tx_index", tx.tx_index) .put("vin_sz", tx.vin_sz) .put("hash", tx.hash) .put("vout_sz", tx.vout_sz) .put("relayed_by", tx.relayed_by) tx.out.headOption.foreach { _ => import scala.collection.JavaConverters._ struct.put("out", tx.out.map(_.toStruct()).asJava) } tx.rbf.foreach(struct.put("rbf", _)) tx.inputs.headOption.foreach { _ => val inputs = new util.ArrayList[Struct] tx.inputs.foreach(i => inputs.add(i.toStruct())) struct.put("inputs", inputs) } tx.out.headOption.foreach { _ => val outputs = new util.ArrayList[Struct] tx.out.foreach(output => outputs.add(output.toStruct())) } struct } } }
Example 33
Source File: JMSReader.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.source.readers import com.datamountaineer.streamreactor.connect.converters.source.Converter import com.datamountaineer.streamreactor.connect.jms.JMSSessionProvider import com.datamountaineer.streamreactor.connect.jms.config.JMSSettings import com.datamountaineer.streamreactor.connect.jms.source.domain.JMSStructMessage import com.typesafe.scalalogging.StrictLogging import javax.jms.{Message, MessageConsumer} import org.apache.kafka.connect.source.SourceRecord import scala.util.Try class JMSReader(settings: JMSSettings) extends StrictLogging { val provider = JMSSessionProvider(settings) provider.start() val consumers: Vector[(String, MessageConsumer)] = (provider.queueConsumers ++ provider.topicsConsumers).toVector val convertersMap: Map[String, Option[Converter]] = settings.settings.map(s => (s.source, s.sourceConverters)).toMap val topicsMap: Map[String, String] = settings.settings.map(s => (s.source, s.target)).toMap def poll(): Vector[(Message, SourceRecord)] = { val messages = consumers .flatMap({ case (source, consumer) => (0 to settings.batchSize) .flatMap(_ => Option(consumer.receiveNoWait())) .map(m => (m, convert(source, topicsMap(source), m))) }) messages } def convert(source: String, target: String, message: Message): SourceRecord = { convertersMap(source).getOrElse(None) match { case c: Converter => c.convert(target, source, message.getJMSMessageID, JMSStructMessage.getPayload(message)) case None => JMSStructMessage.getStruct(target, message) } } def stop: Try[Unit] = provider.close() } object JMSReader { def apply(settings: JMSSettings): JMSReader = new JMSReader(settings) }
Example 34
Source File: HiveSource.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source import com.landoop.streamreactor.connect.hive import com.landoop.streamreactor.connect.hive._ import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record} import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper} import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.hive.metastore.IMetaStoreClient import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ class HiveSource(db: DatabaseName, tableName: TableName, topic: Topic, offsetReader: HiveSourceOffsetStorageReader, config: HiveSourceConfig) (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] { val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic) .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}")) private val table = client.getTable(db.value, tableName.value) private val format = HiveFormat(hive.serde(table)) private val metastoreSchema = HiveSchemas.toKafka(table) private val parts = TableFileScanner.scan(db, tableName) private val readers = parts.map { case (path, partition) => val fns: Seq[Struct => Struct] = Seq( partition.map(new PartitionValueMapper(_).map _), tableConfig.projection.map(new ProjectionMapper(_).map _) ).flatten val mapper: Struct => Struct = Function.chain(fns) val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0)) new HiveReader { lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema) override def iterator: Iterator[Record] = reader.iterator.map { record => Record(mapper(record.struct), record.path, record.offset) } override def close(): Unit = reader.close() } } private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit) override def hasNext: Boolean = iterator.hasNext override def next(): SourceRecord = { val record = iterator.next val sourcePartition = SourcePartition(db, tableName, topic, record.path) val offset = SourceOffset(record.offset) new SourceRecord( fromSourcePartition(sourcePartition).asJava, fromSourceOffset(offset).asJava, topic.value, record.struct.schema, record.struct ) } def close(): Unit = { readers.foreach(_.close()) } }
Example 35
Source File: ReThinkSourceReadersFactory.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rethink.source import java.util import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.atomic.AtomicBoolean import com.datamountaineer.streamreactor.connect.rethink.ReThinkConnection import com.datamountaineer.streamreactor.connect.rethink.config.{ReThinkSourceConfig, ReThinkSourceSetting, ReThinkSourceSettings} import com.rethinkdb.RethinkDB import com.rethinkdb.net.{Connection, Cursor} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data.SchemaBuilder import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future object ReThinkSourceReadersFactory { def apply(config: ReThinkSourceConfig, r: RethinkDB): Set[ReThinkSourceReader] = { val conn = Some(ReThinkConnection(r, config)) val settings = ReThinkSourceSettings(config) settings.map(s => new ReThinkSourceReader(r, conn.get, s)) } } class ReThinkSourceReader(rethink: RethinkDB, conn: Connection, setting: ReThinkSourceSetting) extends StrictLogging { logger.info(s"Initialising ReThink Reader for ${setting.source}") private val keySchema = SchemaBuilder.string().optional().build() private val valueSchema = ChangeFeedStructBuilder.schema private val sourcePartition = Map.empty[String, String] private val offset = Map.empty[String, String] private val stopFeed = new AtomicBoolean(false) private val handlingFeed = new AtomicBoolean(false) private var feed : Cursor[util.HashMap[String, String]] = _ val queue = new LinkedBlockingQueue[SourceRecord]() val batchSize = setting.batchSize def start() = { feed = getChangeFeed() startFeed(feed) } def stop() = { logger.info(s"Closing change feed for ${setting.source}") stopFeed.set(true) while (handlingFeed.get()) { logger.debug("Waiting for feed to shutdown...") Thread.sleep(1000) } feed.close() logger.info(s"Change feed closed for ${setting.source}") } private def handleFeed(feed: Cursor[util.HashMap[String, String]]) = { handlingFeed.set(true) //feed.next is blocking while(!stopFeed.get()) { logger.debug(s"Waiting for next change feed event for ${setting.source}") val cdc = convert(feed.next().asScala.toMap) queue.put(cdc) } handlingFeed.set(false) } private def getChangeFeed(): Cursor[util.HashMap[String, String]] = { logger.info(s"Initialising change feed for ${setting.source}") rethink .db(setting.db) .table(setting.source) .changes() .optArg("include_states", true) .optArg("include_initial", setting.initialise) .optArg("include_types", true) .run(conn) } private def convert(feed: Map[String, String]) = { new SourceRecord(sourcePartition.asJava, offset.asJava, setting.target, keySchema, setting.source, valueSchema, ChangeFeedStructBuilder(feed)) } }
Example 36
Source File: PulsarSourceTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.pulsar.source import java.util import java.util.UUID import com.datamountaineer.streamreactor.connect.converters.source.Converter import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.source.{SourceRecord, SourceTask} import org.apache.pulsar.client.api.{ClientConfiguration, PulsarClient} import org.apache.pulsar.client.impl.auth.AuthenticationTls import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} class PulsarSourceTask extends SourceTask with StrictLogging { private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private var pulsarManager: Option[PulsarManager] = None private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def start(props: util.Map[String, String]): Unit = { logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/pulsar-source-ascii.txt")).mkString + s" $version") logger.info(manifest.printManifest()) val conf = if (context.configs().isEmpty) props else context.configs() implicit val settings = PulsarSourceSettings(PulsarSourceConfig(conf), props.getOrDefault("tasks.max", "1").toInt) val name = conf.getOrDefault("name", s"kafka-connect-pulsar-source-${UUID.randomUUID().toString}") val convertersMap = buildConvertersMap(conf, settings) val messageConverter = PulsarMessageConverter( convertersMap, settings.kcql, settings.throwOnConversion, settings.pollingTimeout, settings.batchSize) val clientConf = new ClientConfiguration() settings.sslCACertFile.foreach(f => { clientConf.setUseTls(true) clientConf.setTlsTrustCertsFilePath(f) val authParams = settings.sslCertFile.map(f => ("tlsCertFile", f)).toMap ++ settings.sslCertKeyFile.map(f => ("tlsKeyFile", f)).toMap clientConf.setAuthentication(classOf[AuthenticationTls].getName, authParams.asJava) }) pulsarManager = Some(new PulsarManager(PulsarClient.create(settings.connection, clientConf), name, settings.kcql, messageConverter)) enableProgress = settings.enableProgress } def buildConvertersMap(props: util.Map[String, String], settings: PulsarSourceSettings): Map[String, Converter] = { settings.sourcesToConverters.map { case (topic, clazz) => logger.info(s"Creating converter instance for $clazz") val converter = Try(Class.forName(clazz).newInstance()) match { case Success(value) => value.asInstanceOf[Converter] case Failure(_) => throw new ConfigException(s"Invalid ${PulsarConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!") } import scala.collection.JavaConverters._ converter.initialize(props.asScala.toMap) topic -> converter } } override def stop(): Unit = { logger.info("Stopping Pulsar source.") pulsarManager.foreach(_.close()) progressCounter.empty } override def version: String = manifest.version() }
Example 37
Source File: PulsarMessageConverterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.pulsar.source import java.util import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings} import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import org.apache.kafka.connect.source.SourceRecord import org.apache.pulsar.client.api.MessageBuilder import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ class PulsarMessageConverterTest extends AnyWordSpec with Matchers with ConverterUtil { val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic" val jsonMessage = "{\"int8\":12,\"int16\":12,\"int32\":12,\"int64\":12,\"float32\":12.2,\"float64\":12.2,\"boolean\":true,\"string\":\"foo\"}" "should convert messages" in { val props = Map( PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650", PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO kafka_topic SELECT * FROM $pulsarTopic BATCH = 10", PulsarConfigConstants.THROW_ON_CONVERT_ERRORS_CONFIG -> "true", PulsarConfigConstants.POLLING_TIMEOUT_CONFIG -> "500" ).asJava val config = PulsarSourceConfig(props) val settings = PulsarSourceSettings(config, 1) // test part of the task here aswell val task = new PulsarSourceTask() val convertersMap = task.buildConvertersMap(props, settings) val converter = PulsarMessageConverter(convertersMap, settings.kcql, false, 100, 100) val message = MessageBuilder .create .setContent(jsonMessage.getBytes) .setKey("landoop") .setSequenceId(1) .build() // pulsar message converter.convertMessages(message, pulsarTopic) val list = new util.ArrayList[SourceRecord]() converter.getRecords(list) list.size shouldBe 1 val record = list.get(0) record.key().toString shouldBe "landoop" record.value().asInstanceOf[Array[Byte]].map(_.toChar).mkString shouldBe jsonMessage } }